<<<<<<< HEAD COVID-19 US

Prepare county level data

Read and format prevalence data

df_us_prev <- read_csv('US_prevalence.csv')

df_us_prev <- df_us_prev %>% 
  select(fips, date, rate) %>% 
  mutate(date = as.Date(date, "%d%b%Y")) %>% 
  dplyr::rename(county_fips = fips, 
         rate_day = rate) %>%
  mutate(county_fips = as.character(county_fips)) %>% 
  filter(rate_day >= 0 & rate_day <= 1000) %>%
  filter(date <= '2020-04-15')
  
df_us_prev %>% write_csv('us_prev_fips.csv')
df_us_death <- read_csv('US_prevalence.csv')

df_us_death <-df_us_death %>%
  mutate(date = as.Date(date, "%d%b%Y"), 
         fips = as.character(fips))

df_us_death_0930 <- df_us_death %>% 
  filter(date == '2020-09-30') %>% 
  dplyr::rename(county_fips = fips,
         case_rate_0930 = rate,
         death_rate_0930 = rate_death) %>% 
  select(county_fips, case_rate_0930, death_rate_0930)

df_us_death_0415 <- df_us_death %>% 
  filter(date == '2020-04-15') %>% 
  dplyr::rename(county_fips = fips,
         case_rate_0415 = rate,
         death_rate_0415 = rate_death) %>% 
  select(county_fips, case_rate_0415, death_rate_0415)

df_us_death <- merge(df_us_death_0415, df_us_death_0930)

# save df
df_us_death %>% write_csv('us_death_county_maps.csv')

Read and format personality data

df_us_pers <- read_csv('US_personality_2.csv')

df_us_pers %>% filter(freq >= 50) %>% .$freq %>% sd()
## [1] 3175.839
df_us_pers <- df_us_pers %>%
  dplyr::rename(county_fips = county,
         pers_o = open, 
         pers_c = sci,
         pers_e = extra,
         pers_a = agree,
         pers_n = neuro) %>%
  filter(freq >= 50) %>% 
  select(-county_name, -freq) %>%
  mutate(county_fips = as.character(county_fips))

df_us_pers %>% .$freq %>% mean
## [1] NA

Read and format county level controls

df_us_ctrl <- read.csv('US_controls_2.csv')

df_us_ctrl <- df_us_ctrl %>% select(-county_name) %>%
  rename(county_fips = county,
         airport_dist = airport_distance,
         conservative = republican,
         age = medage,
         healthcare = physician_pc) %>%
  mutate(county_fips = as.character(county_fips), 
         male=male*100,
         tourism=tourism*100,
         manufact=manufact*100,
         academics=academics*100)

df_us_temp <- read_csv("US_controls_weather.csv") %>% 
  filter(month %in% c('Mar')) %>% 
  group_by(fips) %>% summarise(temp = mean(tempc)) %>%
  rename(county_fips = fips) %>% 
  mutate(county_fips = as.character(as.numeric(county_fips)))

df_us_ctrl <- df_us_ctrl %>% plyr::join(df_us_temp, 'county_fips')

Read and format social distancing data FB

fb_files <- list.files('../FB Data/US individual files',
                       '*.csv', full.names = T)

df_us_socdist <- fb_files %>% 
  map(read_csv) %>% 
  bind_rows()

df_us_socdist <- df_us_socdist %>%
  select(-age_bracket, -gender, -baseline_name, 
         -baseline_type, -polygon_name) %>%
  rename(date = ds,
         county_fips = polygon_id,
         socdist_tiles = all_day_bing_tiles_visited_relative_change,
         socdist_single_tile = all_day_ratio_single_tile_users) %>%
  mutate(county_fips = as.character(county_fips))
 
# save object for descritives 
df_us_socdist_desc <- df_us_socdist

Merge prevalence data

# create sequence of dates
date_sequence <- seq.Date(min(df_us_prev$date),
                          max(df_us_prev$date), 1)
                     
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence)) 
names(df_dates) <- c('date', 'time')

# join data frames
df_us_prev <- df_us_prev %>%
  plyr::join(df_us_ctrl, by='county_fips') %>% 
  plyr::join(df_us_pers, by='county_fips') %>%
  merge(df_dates, by='date') %>% 
  arrange(county_fips, date) %>% 
  drop_na()

df_us_prev %>% select(-date, -rate_day, -time) %>% 
  distinct() %>% write_csv('df_us_pers_fips.csv')

df_us_prev %>% select(county_fips) %>% distinct() %>% nrow()
## [1] 2486
# join data frames 
df_us_death <- df_us_death %>%
  plyr::join(df_us_ctrl, by='county_fips') %>% 
  plyr::join(df_us_pers, by='county_fips') %>%
  drop_na()

# save df
df_us_death %>% 
  #select(county_fips, case_rate, death_rate) %>% 
  write_csv('us_death_fips_controls.csv')

Merge social distancing data

# create sequence of dates
date_sequence <- seq.Date(min(df_us_socdist$date),
                          as.Date('2020-04-28'), 1)
                     
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence)) 
names(df_dates) <- c('date', 'time')

# join data frames 
df_us_socdist <- df_us_socdist %>%
  plyr::join(df_us_ctrl, by='county_fips') %>% 
  plyr::join(df_us_pers, by='county_fips') %>%
  inner_join(df_dates, by='date') %>% 
  arrange(county_fips, date)

fips_complete <- df_us_socdist %>% 
  group_by(county_fips) %>% 
  summarise(n = n()) %>% 
  filter(! n<max(n)) %>% .$county_fips

df_us_socdist <- df_us_socdist %>%
  filter(county_fips %in% fips_complete)

df_us_socdist %>% select(county_fips) %>% distinct() %>% nrow()
## [1] 2582

Control for weekend effect

easter <- seq.Date(as.Date('2020-04-10'), as.Date('2020-04-13'), 1)

df_us_loess <- df_us_socdist %>% 
  mutate(weekday = format(date, '%u')) %>% 
  filter(!weekday %in% c('6','7') | date %in% easter) %>% 
  split(.$county_fips) %>%
  map(~ loess(socdist_single_tile ~ time, data = .)) %>%
  map(predict, 1:max(df_us_socdist$time)) %>% 
  bind_rows() %>% 
  gather(key = 'county_fips', value = 'loess') %>% 
  group_by(county_fips) %>% 
  mutate(time = row_number())

df_us_loess_2 <- df_us_socdist %>% 
  mutate(weekday = format(date, '%u')) %>% 
  filter(!weekday %in% c('6','7') | date %in% easter) %>% 
  split(.$county_fips) %>%
  map(~ loess(socdist_tiles ~ time, data = .)) %>%
  map(predict, 1:max(df_us_socdist$time)) %>% 
  bind_rows() %>% 
  gather(key = 'county_fips', value = 'loess') %>% 
  rename(loess_2 = loess) %>%
  group_by(county_fips) %>% 
  mutate(time = row_number())

df_us_socdist <- df_us_socdist %>% 
  merge(df_us_loess, by=c('county_fips', 'time')) %>% 
  merge(df_us_loess_2, by=c('county_fips', 'time')) %>% 
  mutate(weekday = format(date, '%u')) %>% 
  mutate(socdist_single_tile_clean = ifelse(weekday %in% c('6','7') | date %in% easter, 
                                            loess, socdist_single_tile),
         socdist_tiles_clean = ifelse(weekday %in% c('6','7') | date %in% easter, 
                                            loess_2, socdist_tiles)) %>%
  arrange(county_fips, time) %>% 
  select(-weekday)

df_us_socdist <- df_us_socdist %>% drop_na() %>% mutate(time = time-1)

Pick cleaned social distancing metrics

df_us_socdist <- df_us_socdist %>% 
  mutate(socdist_single_tile = socdist_single_tile_clean,
         socdist_tiles = socdist_tiles_clean) %>% 
  select(-loess, -loess_2, -socdist_single_tile_clean, -socdist_tiles_clean)

Define outcomes of interest

COVID - Extract onset

# get onset day
df_us_onset_prev <- df_us_prev %>% 
  group_by(county_fips) %>%
  filter(rate_day > 0.1) %>%
  summarise(onset_prev = min(time))

# merge with county data
df_us_onset_prev <- df_us_prev %>%
  select(-date, -time, -rate_day) %>%
  distinct() %>%
  mutate(county_fips = as.character(county_fips)) %>%
  left_join(df_us_onset_prev, by = 'county_fips')

# handle censored data
df_us_onset_prev <- df_us_onset_prev %>%
  mutate(event = ifelse(is.na(onset_prev), 0, 1)) %>%
  mutate(onset_prev = replace_na(onset_prev, as.numeric(diff(range(df_us_prev$date)))+1))

COVID - Extract slopes

# cut time series
df_us_prev <- df_us_prev %>% 
  filter(date > '2020-03-15' & date < '2020-04-15')

# extract slope prevalence
df_us_slope_prev <- df_us_prev %>% 
  split(.$county_fips) %>% 
  map(~ lm(log(rate_day+1) ~ time, data = .)) %>%
  map(coef) %>% 
  map_dbl('time') %>% 
  as.data.frame() %>% 
  rownames_to_column('county_fips') %>% 
  rename(slope_prev = '.')
  
# merge with county data
df_us_slope_prev <- df_us_onset_prev %>% 
  inner_join(df_us_slope_prev, by = 'county_fips')

# get unscaled object for descriptives
df_us_prev_desc <- df_us_slope_prev
# cut time series before onset
df_us_slope_var <- df_us_prev %>% 
  filter(date <= '2020-05-15') %>%
  group_by(county_fips) %>% 
  filter(rate_day > 0.1) %>%
  mutate(time = time-min(time)+1) %>% 
  ungroup() %>%
  filter(time <= 30)

# extract slope prevalence
df_us_slope_var <- df_us_slope_var %>% 
  split(.$county_fips) %>% 
  map(~ lm(log(rate_day+1) ~ time, data = .)) %>%
  map(coef) %>% 
  map_dbl('time') %>% 
  as.data.frame() %>% 
  rownames_to_column('county_fips') %>% 
  rename(slope_prev_var = '.')

# merge with county data
df_us_slope_prev <- df_us_slope_prev %>% 
  left_join(df_us_slope_var, by = 'county_fips') %>%
  mutate(slope_prev_var = replace_na(slope_prev_var, 0))

Social Distancing - Change point analysis

# keep only counties with full data
fips_complete <- df_us_socdist %>% 
  group_by(county_fips) %>% 
  summarise(n = n()) %>% 
  filter(n==max(.$n)) %>% 
  .$county_fips

# run changepoint analysis
df_us_socdist_cpt_results <- df_us_socdist %>% 
  select(county_fips, socdist_single_tile) %>%
  filter(county_fips %in% fips_complete) %>% 
  split(.$county_fips) %>%
  map(~ cpt.meanvar(as.vector(.$socdist_single_tile),
                    #penalty = 'Asymptotic',
                    class=TRUE,
                    param.estimates=TRUE,
                    Q=1,
                    test.stat = 'Normal'))

df_us_socdist_cpt_results_2 <- df_us_socdist %>% 
  select(county_fips, socdist_tiles) %>%
  filter(county_fips %in% fips_complete) %>% 
  split(.$county_fips) %>%
  map(~ cpt.meanvar(as.vector(.$socdist_tiles),
                    #penalty = 'Asymptotic',
                    class=TRUE,
                    param.estimates=TRUE,
                    Q=1,
                    test.stat = 'Normal'))

# calculate change point
df_us_socdist_cpt_day <- df_us_socdist_cpt_results %>% 
  map(cpts) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(cpt_day_socdist = '.') %>%
  rownames_to_column('county_fips')

df_us_socdist_cpt_day_2 <- df_us_socdist_cpt_results_2 %>% 
  map(cpts) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(cpt_day_socdist_2 = '.') %>%
  rownames_to_column('county_fips')

# calculate mean differences
df_us_socdist_cpt_mean_diff <- df_us_socdist_cpt_results %>% 
  map(param.est) %>% 
  map(~ .$mean) %>% 
  map(~ .[2]) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(mean_diff_socdist = '.') %>%
  rownames_to_column('county_fips')

df_us_socdist_cpt_mean_diff_2 <- df_us_socdist_cpt_results_2 %>% 
  map(param.est) %>% 
  map(~ .$mean) %>% 
  map(~ -.[2]) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(mean_diff_socdist_2 = '.') %>%
  rownames_to_column('county_fips')

# calculate means
df_us_socdist_mean <- df_us_socdist %>%
  group_by(county_fips) %>%
  summarise(mean_socdist = mean(socdist_single_tile))

df_us_socdist_mean_2 <- df_us_socdist %>%
  group_by(county_fips) %>%
  summarise(mean_socdist_2 = -mean(socdist_tiles))

# merge with county data
df_us_cpt_socdist <- df_us_socdist %>% 
  select(-time, -date, -socdist_single_tile, -socdist_tiles) %>%
  distinct() %>% 
  left_join(df_us_socdist_cpt_day, by='county_fips') %>%
  left_join(df_us_socdist_cpt_day_2, by='county_fips') %>%
  left_join(df_us_socdist_cpt_mean_diff, by='county_fips') %>%
  left_join(df_us_socdist_cpt_mean_diff_2, by='county_fips') %>%
  left_join(df_us_socdist_mean, by='county_fips') %>%
  left_join(df_us_socdist_mean_2, by='county_fips') %>%
  left_join(select(df_us_onset_prev, county_fips, onset_prev), by='county_fips') %>%
  left_join(select(df_us_slope_prev, county_fips, slope_prev), by='county_fips') 

# handle censored data
df_us_cpt_socdist <- df_us_cpt_socdist %>% 
  mutate(cpt_day_socdist = ifelse(is.na(cpt_day_socdist), as.numeric(diff(range(df_us_socdist$date))), cpt_day_socdist)) %>% 
  mutate(event = ifelse(cpt_day_socdist >= as.numeric(diff(range(df_us_socdist$date))), 0, 1))

Remove incomplete cases

df_us_prev_unscaled <- inner_join(df_us_slope_prev, df_us_cpt_socdist %>% select(county_fips), by='county_fips')
df_us_death_unscaled <- inner_join(df_us_death, df_us_cpt_socdist %>% select(county_fips), by='county_fips')
df_us_socdist_unscaled <- inner_join(df_us_cpt_socdist, df_us_slope_prev %>% select(county_fips), by='county_fips')

fips_list <- df_us_prev_unscaled %>% select(county_fips)

df_us_prev_unscaled %>% merge(df_us_death_unscaled %>% select(county_fips:death_rate_0930))  %>%
  merge(df_us_socdist_unscaled %>% select(county_fips, cpt_day_socdist_2, mean_diff_socdist_2)) %>% 
  select(-event) %>% write_csv('us_all_variables.csv')

Rescale Data

df_us_prev_scaled <- df_us_prev_unscaled %>% 
  mutate_at(vars(-county_fips, -event), scale)
df_us_death_scaled <- df_us_death_unscaled %>% 
  mutate_at(vars(-county_fips), scale)
df_us_socdist_scaled <- df_us_socdist_unscaled %>%
  mutate_at(vars(-county_fips, -event), scale)

Calculate spatial lags

# create UDF to calculate lagged variables
calc_lags <- function(df, weights, cols){
  
  cols_only <- df %>% select(cols)
  cols_lag <- weights %*% as.matrix(cols_only) %>% 
  as.matrix() %>% as.data.frame()
names(cols_lag) <- paste0(names(cols_lag), '_lag')

return(cols_lag)
}
# read_weight matrix 
weight_mat_norm <- read_csv('df_us_spatial_weights_matrix.csv')
## 
## -- Column specification --------------------------------------------------------------------
## cols(
##   .default = col_double()
## )
## i Use `spec()` for the full column specifications.
weight_mat_norm <- weight_mat_norm %>% select(-county_fips) %>% as.matrix()
# generate spatially lagged y 
y_only <- df_us_prev_scaled %>% select(onset_prev,slope_prev,slope_prev_var) %>% names()
y_lag <- calc_lags(df_us_prev_scaled, weight_mat_norm, y_only)

# generate spatially lagged X
X_only <- df_us_prev_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_prev_scaled, weight_mat_norm, X_only)

# bind new variables to df
df_us_prev_scaled <- cbind(df_us_prev_scaled, y_lag, X_lag)
# generate spatially lagged y 
y_only <- df_us_death_scaled %>% select(case_rate_0415:death_rate_0930) %>% names()
y_lag <- calc_lags(df_us_death_scaled, weight_mat_norm, y_only)

# generate spatially lagged X
X_only <- df_us_death_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_death_scaled, weight_mat_norm, X_only)

# bind new variables to df
df_us_death_scaled <- cbind(df_us_death_scaled, y_lag, X_lag)
# generate spatially lagged y 
y_only <- df_us_socdist_scaled %>% select(cpt_day_socdist_2,mean_diff_socdist_2) %>% names()
y_lag <- calc_lags(df_us_socdist_scaled, weight_mat_norm, y_only)

# generate spatially lagged X
X_only <- df_us_socdist_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_socdist_scaled, weight_mat_norm, X_only)

# bind new variables to df
df_us_socdist_scaled <- cbind(df_us_socdist_scaled, y_lag, X_lag)
write_csv(df_us_prev_scaled, 'df_us_slope_prev.csv')
write_csv(df_us_death_scaled, 'df_us_death_scaled.csv')
write_csv(df_us_socdist_scaled, 'df_us_cpt_socdist.csv')

Run Specification Curve Analysis

# define function to calculate specification curve analyses for all traits
spec_calculate <- function(df, y, model, controls, all.comb = T){
  
  spec_results_o <- run_specs(df = df, 
                       y = c(y), x = c("pers_o"), 
                       model = c(model), controls = controls, 
                       all.comb = all.comb)
  
  spec_results_c <- run_specs(df = df, 
                       y = c(y), x = c("pers_c"), 
                       model = c(model), controls = controls, 
                       all.comb = all.comb)
  
  spec_results_e <- run_specs(df = df, 
                       y = c(y), x = c("pers_e"), 
                       model = c(model), controls = controls, 
                       all.comb = all.comb)
  
  spec_results_a <- run_specs(df = df, 
                       y = c(y), x = c("pers_a"), 
                       model = c(model), controls = controls, 
                       all.comb = all.comb)
  
  spec_results_n <- run_specs(df = df, 
                       y = c(y), x = c("pers_n"), 
                       model = c(model), controls = controls, 
                       all.comb = all.comb)
  
  spec_results <- list(spec_results_o, spec_results_c, spec_results_e, 
                      spec_results_a, spec_results_n)
  
  names(spec_results) <- list('spec_results_o', 'spec_results_c', 'spec_results_e', 
                      'spec_results_a', 'spec_results_n')

  return(spec_results)
  }
# adapted from specr package code
format_results <- function(df, var, null = 0, desc = FALSE) {

  # rank specs
  if (isFALSE(desc)) {
    df <- df %>%
      dplyr::arrange(!! var)
  } else {
    df <- df %>%
      dplyr::arrange(desc(!! var))
  }

  # create rank variable and color significance
  df <- df %>%
    dplyr::mutate(specifications = 1:nrow(df),
                  color = case_when(conf.low > null ~ "#377eb8",
                                    conf.high < null ~ "#e41a1c",
                                    TRUE ~ "darkgrey"))
  return(df)
}


# define function to plot single specification curve
plot_curves <- function(results, filename, hr=F){
  
  file_path_eps <- paste0("../../Plots/US/", filename,".eps")
  file_path_pdf <- paste0("../../Plots/US/", filename,".pdf")
  file_path_png <- paste0("../../Plots/US/", filename,".png")

  if(hr==F){
    
    results %>%
    format_results(var = .$estimate, null = 0, desc = F) %>%
    ggplot(aes(x = specifications,
               y = estimate,
               ymin = conf.low,
               ymax = conf.high,
               color = color)) +
    geom_point(aes(color = color),
               size = 1) +
    theme_minimal() +
    scale_color_identity() +
    theme(strip.text = element_blank(),
          axis.line = element_line("black", size = .5),
          legend.position = "none",
          panel.spacing = unit(.75, "lines"),
          axis.text = element_text(colour = "black")) +
    labs(x = "") +
      geom_pointrange(alpha = 0.05,
                      size = .6,
                      fatten = 1) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
    labs(x = "", y = "standarized coefficient") + 
      coord_fixed(ratio = 2000, ylim = c(-0.4, 0.4)) +
      theme(panel.border = element_blank(), panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(), 
            axis.line = element_line(colour = "black")) +
      ggsave(file=file_path_eps, device = 'eps')+
      ggsave(file=file_path_pdf, device = 'pdf')+
      ggsave(file=file_path_png, device = 'png')
    
  }else{
    
    results %>%
    format_results(var = .$estimate, null = 1, desc = F) %>%
    ggplot(aes(x = specifications,
               y = estimate,
               ymin = conf.low,
               ymax = conf.high,
               color = color)) +
    geom_point(aes(color = color),
               size = 1) +
    theme_minimal() +
    scale_color_identity() +
    theme(strip.text = element_blank(),
          axis.line = element_line("black", size = .5),
          legend.position = "none",
          panel.spacing = unit(.75, "lines"),
          axis.text = element_text(colour = "black")) +
    labs(x = "") +
      geom_pointrange(alpha = 0.05,
                      size = .6,
                      fatten = 1) +
    geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
    labs(x = "", y = "standarized coefficient") + 
      coord_fixed(ratio = 2000, ylim = c(0.6, 1.4)) +
      theme(panel.border = element_blank(), panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(), 
      axis.line = element_line(colour = "black")) +
      ggsave(file=file_path_eps, device = 'eps')+
      ggsave(file=file_path_pdf, device = 'pdf')+
      ggsave(file=file_path_png, device = 'png')  
    }
  
}

# define function to plot all curves in list
plot_all_curves <- function(ls, analysis, hr=F){
  
  pers <- c('o', 'c', 'e', 'a', 'n')
  filenames <- as.list(paste0(analysis, '_', pers))
  hr <- as.list(rep(hr, 5))
  pmap(list(ls, filenames, hr), plot_curves)
}
# define function to calculate summary stats for single specification curve
calc_summary <- function(df){
  
  dft <- df %>% select(estimate:fit_nobs)
  dft <- dft %>% mutate(significant = as.numeric(p.value<0.05),
                        positive = as.numeric(statistic>0), 
                        negative = as.numeric(statistic<0), 
                        significant_positive = as.numeric(p.value<0.05 & statistic>0),
                        significant_negative = as.numeric(p.value<0.05 & statistic<0))
  
  mean_temp <- dft %>% map_if(is.numeric, mean, .else=NULL) %>% as.data.frame()
  sd_temp <- dft %>% map_if(is.numeric, sd, .else=NULL) %>% as.data.frame()
  
  df_temp <- rbind(mean_temp, sd_temp)
  row.names(df_temp) <- c('mean', 'sd')
  
  return(df_temp)
}

# define function to calculate summary stats for all curves in list
calc_all_summaries <- function(ls){
  
  ls <- ls %>% map(calc_summary)
  names(ls) <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')
  return(ls)
}
coef_to_hr <- function(df){
  
  df %>% mutate(conf.low = exp(estimate-1.96*std.error),
           conf.high = exp(estimate+1.96*std.error),
           estimate = exp(estimate)) 
}

filter_socdist <- function(df){
  
  df %>% filter(str_detect(controls, 'onset_prev') & 
                  str_detect(controls, 'slope_prev'))
}
covariates <- c("airport_dist", "conservative", 'male', 'age', 'popdens',
                'manufact', 'tourism', 'academics', 'medinc', 'healthcare',
                'temp')

Predict Onset

cox_model <- function(formula, data){
  formula <- as.formula(formula)
  coxph(formula = formula, data = data)}
cox_onset_prev <- spec_calculate(df = df_us_prev_scaled, 
               y = "Surv(onset_prev, event)", 
               model = "cox_model", 
               controls = covariates %>% append('onset_prev_lag'),
               all.comb = T)

cox_onset_prev_hr <- cox_onset_prev %>% map(coef_to_hr)

calc_all_summaries(cox_onset_prev_hr)
## $pers_o
##        estimate   std.error statistic     p.value   conf.low  conf.high fit_n
## mean 1.16172145 0.026014376  5.766790 0.007433825 1.10404476 1.22241719  2366
## sd   0.06206793 0.001120517  2.218639 0.040648218 0.06034861 0.06388357     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       1917          638.2887    2.434685e-35         846.4471
## sd            0          136.8117    1.056543e-33         203.5470
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   1.186354e-35           769.6777     9.006769e-36                   NA
## sd     4.978832e-34           172.8896     3.775740e-34                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.23515525         0.9999908      0.68063283
## sd                   NA    0.04511866         0.0000000      0.01744978
##      fit_std.error.concordance   fit_logLik    fit_AIC    fit_BIC fit_nobs
## mean              0.0065112174 -13403.98321 26821.9664 26860.8760     2366
## sd                0.0002305437     68.40587   134.2635   127.4085        0
##      significant positive negative significant_positive significant_negative
## mean   0.9633789        1        0            0.9633789                    0
## sd     0.1878526        0        0            0.1878526                    0
## 
## $pers_c
##        estimate   std.error statistic     p.value   conf.low  conf.high fit_n
## mean 1.14595693 0.023675413  5.720862 0.001312684 1.09398662 1.20039740  2366
## sd   0.04204126 0.000533867  1.536933 0.009826545 0.03991021 0.04431706     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       1917          634.9587    6.334368e-13         836.1058
## sd            0          158.0395    4.053783e-11         223.5635
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   5.061211e-13           753.4356     4.799701e-13                   NA
## sd     3.238962e-11           188.4106     3.071593e-11                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.23363277         0.9999908      0.67911602
## sd                   NA    0.05275286         0.0000000      0.02191225
##      fit_std.error.concordance   fit_logLik    fit_AIC    fit_BIC fit_nobs
## mean              0.0064685052 -13405.64817 26825.2963 26864.2060     2366
## sd                0.0002280484     79.01976   155.5847   148.9729        0
##      significant positive negative significant_positive significant_negative
## mean  0.99414062        1        0           0.99414062                    0
## sd    0.07633129        0        0           0.07633129                    0
## 
## $pers_e
##       estimate    std.error statistic   p.value   conf.low  conf.high fit_n
## mean 1.0053968 0.0230413250 0.2407622 0.5618723 0.96101875 1.05182497  2366
## sd   0.0210153 0.0004509163 0.9189734 0.2847874 0.02088586 0.02111908     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       1917          601.0713    3.043490e-07         811.6531
## sd            0          156.6765    1.947733e-05         226.5546
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   3.236460e-07           731.9152     3.223853e-07                   NA
## sd     2.071186e-05           193.9189     2.063117e-05                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.22261503         0.9999908      0.67472760
## sd                   NA    0.05272031         0.0000000      0.02245513
##      fit_std.error.concordance   fit_logLik    fit_AIC    fit_BIC fit_nobs
## mean              0.0065180540 -13422.59188 26859.1838 26898.0934     2366
## sd                0.0002496992     78.33827   154.1384   147.2827        0
##      significant  positive  negative significant_positive significant_negative
## mean  0.06713867 0.5056152 0.4943848           0.06713867                    0
## sd    0.25029256 0.5000295 0.5000295           0.25029256                    0
## 
## $pers_a
##        estimate    std.error statistic    p.value   conf.low  conf.high fit_n
## mean 1.12712077 0.0241346132  4.911333 0.02630132 1.07503821 1.18172942  2366
## sd   0.05222035 0.0007992203  1.927298 0.11581101 0.04966844 0.05496148     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       1917          627.6168    7.129527e-12         832.9647
## sd            0          157.1791    4.562814e-10         223.8587
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   5.323420e-12            751.675     5.166855e-12                   NA
## sd     3.406905e-10            190.272     3.306702e-10                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.23126981         0.9999908      0.67999298
## sd                   NA    0.05263276         0.0000000      0.02169922
##      fit_std.error.concordance   fit_logLik    fit_AIC    fit_BIC fit_nobs
## mean              0.0064707427 -13409.31914 26832.6383 26871.5479     2366
## sd                0.0002249616     78.58953   154.7361   148.1604        0
##      significant   positive    negative significant_positive
## mean   0.9313965 0.99462891 0.005371094            0.9313965
## sd     0.2528096 0.07309959 0.073099587            0.2528096
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##        estimate    std.error statistic     p.value   conf.low  conf.high fit_n
## mean 0.87994919 0.0254283747 -5.139354 0.006040369 0.83711030 0.92498283  2366
## sd   0.04229444 0.0008502208  2.109454 0.019840097 0.03905764 0.04576956     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       1917          631.3160    1.042710e-34         830.4507
## sd            0          135.0754    6.242899e-33         207.2895
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   8.175495e-34           746.0484     4.523746e-34                   NA
## sd     4.972358e-32           170.8610     2.735136e-32                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.23293232         0.9999908      0.67921943
## sd                   NA    0.04458783         0.0000000      0.01770616
##      fit_std.error.concordance  fit_logLik   fit_AIC    fit_BIC fit_nobs
## mean              0.0064412561 -13407.4695 26828.939 26867.8487     2366
## sd                0.0001869198     67.5377   132.492   125.5353        0
##      significant positive negative significant_positive significant_negative
## mean   0.9619141        0        1                    0            0.9619141
## sd     0.1914271        0        0                    0            0.1914271
plot_all_curves(cox_onset_prev_hr, 'cox_onset_prev_hr', hr = T)
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

cox_onset_prev_ctrl <- coxph(Surv(onset_prev, event) ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + onset_prev_lag, 
                             data = df_us_prev_scaled)

summary(cox_onset_prev_ctrl)
## Call:
## coxph(formula = Surv(onset_prev, event) ~ airport_dist + conservative + 
##     male + age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + onset_prev_lag, data = df_us_prev_scaled)
## 
##   n= 2366, number of events= 1917 
## 
##                    coef exp(coef) se(coef)      z Pr(>|z|)    
## airport_dist   -0.06929   0.93306  0.02959 -2.341 0.019219 *  
## conservative   -0.26778   0.76508  0.02758 -9.709  < 2e-16 ***
## male           -0.10249   0.90259  0.02677 -3.828 0.000129 ***
## age            -0.11380   0.89244  0.02439 -4.666 3.07e-06 ***
## popdens         0.03785   1.03857  0.02364  1.601 0.109371    
## manufact        0.10299   1.10848  0.02806  3.671 0.000242 ***
## tourism         0.05239   1.05379  0.02662  1.968 0.049029 *  
## academics       0.17781   1.19460  0.04482  3.968 7.26e-05 ***
## medinc          0.33067   1.39190  0.03620  9.134  < 2e-16 ***
## healthcare      0.11392   1.12067  0.02693  4.230 2.34e-05 ***
## temp            0.32833   1.38865  0.03005 10.926  < 2e-16 ***
## onset_prev_lag -0.31413   0.73042  0.04236 -7.416 1.21e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## airport_dist      0.9331     1.0717    0.8805    0.9888
## conservative      0.7651     1.3071    0.7248    0.8076
## male              0.9026     1.1079    0.8565    0.9512
## age               0.8924     1.1205    0.8508    0.9361
## popdens           1.0386     0.9629    0.9916    1.0878
## manufact          1.1085     0.9021    1.0492    1.1712
## tourism           1.0538     0.9490    1.0002    1.1102
## academics         1.1946     0.8371    1.0941    1.3043
## medinc            1.3919     0.7184    1.2966    1.4942
## healthcare        1.1207     0.8923    1.0630    1.1814
## temp              1.3886     0.7201    1.3092    1.4729
## onset_prev_lag    0.7304     1.3691    0.6722    0.7937
## 
## Concordance= 0.709  (se = 0.006 )
## Likelihood ratio test= 896.6  on 12 df,   p=<2e-16
## Wald test            = 1059  on 12 df,   p=<2e-16
## Score (logrank) test = 1203  on 12 df,   p=<2e-16

Predict Slopes

# fixed time windows
lm_slope_prev <- spec_calculate(df = df_us_prev_scaled, 
               y = "slope_prev", 
               model = "lm", 
               controls = covariates %>% append('slope_prev_lag'),
               all.comb = T)

calc_all_summaries(lm_slope_prev)
## $pers_o
##        estimate    std.error statistic   p.value   conf.low conf.high
## mean 0.06650710 0.0218723943  3.051989 0.1060491 0.02361598 0.1093982
## sd   0.03848726 0.0007661755  1.803760 0.2188002 0.03867878 0.0383537
##      fit_r.squared fit_adj.r.squared fit_sigma fit_statistic  fit_p.value
## mean    0.16877711        0.16632951 0.9128307      70.25183 1.477756e-24
## sd      0.03743269        0.03708676 0.0202677      14.16632 5.895233e-23
##        fit_df  fit_logLik   fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3136.82893 6291.6579 6343.57847    1965.8421     2358.000000
## sd   1.732262    53.08913  103.6386   96.63557      88.5283        1.732262
##      fit_nobs significant  positive   negative significant_positive
## mean     2366   0.7268066 0.9838867 0.01611328            0.7268066
## sd          0   0.4456537 0.1259266 0.12592662            0.4456537
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_c
##        estimate    std.error statistic    p.value   conf.low  conf.high
## mean 0.08159129 0.0195674938  4.182360 0.01737430 0.04322001 0.11996257
## sd   0.02905970 0.0004474735  1.515199 0.06422916 0.02934306 0.02880029
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.17120959        0.16877025 0.91146684      71.33624 8.942856e-10
## sd      0.03958576        0.03924605 0.02140208      15.26681 4.423700e-08
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3133.22547 6284.4509 6336.3715   1960.08931     2358.000000
## sd   1.732262    55.91484  109.2984  102.3039     93.62033        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.9228516        1        0            0.9228516
## sd          0   0.2668594        0        0            0.2668594
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.00674942 0.0191251166 0.3431732 0.6112414 -0.03075437 0.04425321
## sd   0.01393104 0.0004257969 0.7179348 0.2737243  0.01343339 0.01445982
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.16442606        0.16196752 0.91517150      67.74067 6.606324e-07
## sd      0.04118872        0.04084978 0.02221979      15.14588 3.531075e-05
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3142.77640 6303.5528 6355.4734   1976.13237     2358.000000
## sd   1.732262    57.88175  113.2271  106.2052     97.41132        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366  0.02978516 0.6547852 0.3452148           0.02978516
## sd          0  0.17001487 0.4754963 0.4754963           0.17001487
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate    std.error statistic     p.value   conf.low  conf.high
## mean 0.11095509 0.0199702382  5.583480 0.006947667 0.07179404 0.15011614
## sd   0.03633964 0.0005199555  1.888186 0.042481328 0.03686347 0.03583717
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.17639488        0.17397035 0.90861824      74.12927 4.806967e-13
## sd      0.03882516        0.03848915 0.02103938      15.64261 2.739705e-11
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3125.83790 6269.6758 6321.5964   1947.82611     2358.000000
## sd   1.732262    55.10243  107.6935  100.7668     91.82149        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.9714355        1        0            0.9714355
## sd          0   0.1665992        0        0            0.1665992
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate    std.error statistic   p.value    conf.low   conf.high
## mean -0.06960205 0.0203062105 -3.442200 0.1089957 -0.10942193 -0.02978217
## sd    0.03665124 0.0004166468  1.838027 0.2464262  0.03637535  0.03694315
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.16969364        0.16724876 0.91232268      70.72756 9.620287e-22
## sd      0.03779965        0.03745861 0.02046116      14.60225 5.487729e-20
##        fit_df fit_logLik   fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3135.5009 6289.0018 6340.92240   1963.67454     2358.000000
## sd   1.732262    53.5602  104.6036   97.66964     89.39616        1.732262
##      fit_nobs significant   positive  negative significant_positive
## mean     2366   0.8081055 0.02587891 0.9741211                    0
## sd          0   0.3938387 0.15879340 0.1587934                    0
##      significant_negative
## mean            0.8081055
## sd              0.3938387
plot_all_curves(lm_slope_prev, 'lm_slope_prev')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_slope_prev_ctrl <- lm(slope_prev ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + slope_prev_lag, 
                             data = df_us_prev_scaled)

summary(lm_slope_prev_ctrl)
## 
## Call:
## lm(formula = slope_prev ~ airport_dist + conservative + male + 
##     age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + slope_prev_lag, data = df_us_prev_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9236 -0.4960 -0.2000  0.2154  6.0413 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -0.003469   0.017857  -0.194  0.84600    
## airport_dist    0.008823   0.021113   0.418  0.67606    
## conservative   -0.247126   0.022130 -11.167  < 2e-16 ***
## male           -0.080222   0.019710  -4.070 4.85e-05 ***
## age            -0.052478   0.019239  -2.728  0.00642 ** 
## popdens         0.139040   0.020596   6.751 1.85e-11 ***
## manufact        0.006652   0.022098   0.301  0.76342    
## tourism        -0.008209   0.020752  -0.396  0.69245    
## academics      -0.084176   0.038229  -2.202  0.02777 *  
## medinc          0.263901   0.030260   8.721  < 2e-16 ***
## healthcare      0.041049   0.023823   1.723  0.08500 .  
## temp            0.219069   0.023035   9.510  < 2e-16 ***
## slope_prev_lag  0.303142   0.031452   9.638  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8684 on 2353 degrees of freedom
## Multiple R-squared:  0.2497, Adjusted R-squared:  0.2458 
## F-statistic: 65.25 on 12 and 2353 DF,  p-value: < 2.2e-16
# variable time windows
lm_slope_prev_var <- spec_calculate(df = df_us_prev_scaled, 
               y = "slope_prev_var", 
               model = "lm", 
               controls = covariates %>% append('slope_prev_var_lag'),
               all.comb = T)

calc_all_summaries(lm_slope_prev_var)
## $pers_o
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.01843360 0.0228448236 0.8092564 0.3374739 -0.02636443 0.06323163
## sd   0.03179557 0.0008021976 1.4119922 0.2928829  0.03186131 0.03180759
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09368869        0.09101223 0.95330359     35.311428 4.358225e-09
## sd      0.02737264        0.02697465 0.01414535      8.558473 1.601068e-07
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3239.79451 6497.58901 6549.50962    2143.4263     2358.000000
## sd   1.732262    35.73047   69.01074   62.48346      64.7363        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.2021484 0.7170410 0.2829590            0.1867676
## sd          0   0.4016514 0.4504917 0.4504917            0.3897724
##      significant_negative
## mean           0.01538086
## sd             0.12307716
## 
## $pers_c
##        estimate    std.error statistic     p.value   conf.low  conf.high
## mean 0.09199460 0.0203745098  4.529303 0.003265235 0.05204079 0.13194841
## sd   0.02635443 0.0003465441  1.337233 0.009768975 0.02675594 0.02596451
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.10121874        0.09856375 0.94935036     38.710210 2.822507e-11
## sd      0.02521521        0.02480452 0.01304579      8.221666 1.221468e-09
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3230.00003 6478.00006 6529.92067   2125.61769     2358.000000
## sd   1.732262    33.10894   63.71753   57.07268     59.63398        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.9880371        1        0            0.9880371
## sd          0   0.1087321        0        0            0.1087321
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##          estimate    std.error  statistic   p.value     conf.low  conf.high
## mean -0.007067026 0.0199332406 -0.3585409 0.6301136 -0.046155523 0.03202147
## sd    0.010169162 0.0002821194  0.5126193 0.2278677  0.009878615 0.01048088
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09279622        0.09011762 0.95376562     34.846983 0.0001193256
## sd      0.02829475        0.02789763 0.01461407      8.898946 0.0044740979
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3240.92388 6499.84776 6551.76837   2145.53694     2358.000000
## sd   1.732262    36.85796   71.26061   64.69776     66.91708        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366           0 0.2226562 0.7773438                    0
## sd          0           0 0.4160802 0.4160802                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate    std.error statistic    p.value   conf.low  conf.high
## mean 0.11504837 0.0208132613  5.555493 0.00127825 0.07423418 0.15586257
## sd   0.03195153 0.0004849158  1.619015 0.00604934 0.03262194 0.03129565
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.10543597        0.10279285 0.94712809     40.680990 8.095085e-15
## sd      0.02407654        0.02366633 0.01247244      8.311559 3.982934e-13
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3224.47345 6466.94691 6518.86751   2115.64393     2358.000000
## sd   1.732262    31.74214   60.99344   54.40905     56.94102        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366  0.99584961        1        0           0.99584961
## sd          0  0.06429754        0        0           0.06429754
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate    std.error statistic   p.value    conf.low    conf.high
## mean -0.04812700 0.0211994748 -2.285890 0.1692657 -0.08969855 -0.006555458
## sd    0.03063361 0.0004145531  1.471557 0.2877701  0.03022191  0.031061141
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09553382        0.09286226 0.95234100     36.196044 9.356756e-12
## sd      0.02627732        0.02587729 0.01357951      8.357229 3.360019e-10
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3237.42430 6492.84860 6544.76920   2139.06251     2358.000000
## sd   1.732262    34.34926   66.24897   59.74882     62.14587        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.6511230 0.1003418 0.8996582                    0
## sd          0   0.4766732 0.3004919 0.3004919                    0
##      significant_negative
## mean            0.6511230
## sd              0.4766732
plot_all_curves(lm_slope_prev_var, 'lm_slope_prev_var')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_slope_prev_var_ctrl <- lm(slope_prev_var ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + slope_prev_var_lag, 
                             data = df_us_prev_scaled)

summary(lm_slope_prev_var_ctrl)
## 
## Call:
## lm(formula = slope_prev_var ~ airport_dist + conservative + male + 
##     age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + slope_prev_var_lag, data = df_us_prev_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1372 -0.5606 -0.2239  0.3000  9.4699 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.002831   0.018948  -0.149 0.881224    
## airport_dist        0.021432   0.022402   0.957 0.338819    
## conservative       -0.223392   0.023463  -9.521  < 2e-16 ***
## male               -0.070245   0.020914  -3.359 0.000795 ***
## age                -0.060962   0.020410  -2.987 0.002847 ** 
## popdens             0.111035   0.021855   5.081 4.06e-07 ***
## manufact            0.037805   0.023416   1.614 0.106557    
## tourism            -0.026901   0.022017  -1.222 0.221893    
## academics          -0.131707   0.040554  -3.248 0.001180 ** 
## medinc              0.213479   0.032092   6.652 3.58e-11 ***
## healthcare          0.032853   0.025277   1.300 0.193814    
## temp                0.222065   0.024472   9.074  < 2e-16 ***
## slope_prev_var_lag  0.235435   0.034511   6.822 1.14e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9214 on 2353 degrees of freedom
## Multiple R-squared:  0.1553, Adjusted R-squared:  0.151 
## F-statistic: 36.04 on 12 and 2353 DF,  p-value: < 2.2e-16

Predict Cases

lm_cases_0415 <- spec_calculate(df = df_us_death_scaled, 
               y = "case_rate_0415", 
               model = "lm", 
               controls = covariates %>% append('case_rate_0415_lag'),
               all.comb = T)

calc_all_summaries(lm_cases_0415)
## $pers_o
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.04876701 0.0227720892  2.150331 0.1735715 0.004111615 0.09342241
## sd   0.03165959 0.0007998025  1.423872 0.2427764 0.031873299 0.03152256
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09943230        0.09677125 0.95027531      38.13421 1.473910e-15
## sd      0.02766227        0.02732592 0.01433976      10.44299 4.683503e-14
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.25867 6482.51733 6534.43794   2129.84261     2358.000000
## sd   1.732262    36.16042   70.17978   64.65003     65.42127        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.4975586 0.9543457 0.0456543            0.4975586
## sd          0   0.5000551 0.2087597 0.2087597            0.5000551
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_c
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.04262502 0.0203947736  2.104424 0.1466779 0.002631474 0.08261857
## sd   0.02066561 0.0004616577  1.037216 0.2339358 0.021205973 0.02015145
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09892688        0.09626521 0.95052224      37.78673 2.060162e-05
## sd      0.03004164        0.02971634 0.01556665      11.45476 9.415328e-04
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.82626 6483.65252 6535.57312   2131.03793     2358.000000
## sd   1.732262    39.13348   76.15729   70.67325     71.04847        1.732262
##      fit_nobs significant  positive   negative significant_positive
## mean     2366   0.6054688 0.9758301 0.02416992            0.6054688
## sd          0   0.4888094 0.1535952 0.15359524            0.4888094
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##         estimate    std.error statistic   p.value     conf.low   conf.high
## mean 0.012073121 0.0198842257 0.6045850 0.5663104 -0.026919260 0.051065502
## sd   0.009551023 0.0003069414 0.4745963 0.2407368  0.009344157 0.009790576
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09705206        0.09438499 0.95151078      36.93931 0.0000119453
## sd      0.03006737        0.02973723 0.01556634      11.25001 0.0005722379
##        fit_df  fit_logLik   fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3235.28615 6488.5723 6540.49291   2135.47188     2358.000000
## sd   1.732262    39.11061   76.0876   70.52753     71.10934        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366  0.01464844 0.8945312 0.1054688           0.01464844
## sd          0  0.12015567 0.3071940 0.3071940           0.12015567
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate   std.error statistic    p.value   conf.low  conf.high
## mean 0.07795076 0.020834282  3.766163 0.02775633 0.03709534 0.11880617
## sd   0.02637373 0.000592395  1.323556 0.09913529 0.02709396 0.02568587
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean     0.1028759         0.1002257 0.94843600      39.58762 1.825308e-07
## sd       0.0300417         0.0297263 0.01560479      11.81480 7.034892e-06
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3227.62466 6473.24933 6525.16993   2121.69844     2358.000000
## sd   1.732262    39.29875   76.53274   71.18872     71.04863        1.732262
##      fit_nobs significant   positive     negative significant_positive
## mean     2366   0.9033203 0.99951172 0.0004882812            0.9033203
## sd          0   0.2955572 0.02209439 0.0220943887            0.2955572
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate    std.error statistic   p.value    conf.low    conf.high
## mean -0.04774835 0.0211530060 -2.276129 0.1478689 -0.08922878 -0.006267932
## sd    0.02663904 0.0004849494  1.291895 0.2612650  0.02607382  0.027225747
##      fit_r.squared fit_adj.r.squared fit_sigma fit_statistic  fit_p.value
## mean    0.09947116        0.09681049 0.9502443      38.13183 2.304518e-12
## sd      0.02894177        0.02861880 0.0150099      11.11191 1.129695e-10
##        fit_df fit_logLik    fit_AIC  fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.1560 6482.31207 6534.233   2129.75070     2358.000000
## sd   1.732262    37.7909   73.49507   68.108     68.44729        1.732262
##      fit_nobs significant   positive  negative significant_positive
## mean     2366   0.6376953 0.05932617 0.9406738                    0
## sd          0   0.4807249 0.23626300 0.2362630                    0
##      significant_negative
## mean            0.6376953
## sd              0.4807249
plot_all_curves(lm_cases_0415, 'lm_cases_0415')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_cases_0415_ctrl <- lm(case_rate_0415 ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + case_rate_0415_lag, 
                             data = df_us_death_scaled)

summary(lm_cases_0415_ctrl)
## 
## Call:
## lm(formula = case_rate_0415 ~ airport_dist + conservative + male + 
##     age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + case_rate_0415_lag, data = df_us_death_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9724 -0.3185 -0.1425  0.0664 14.0442 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.002321   0.018957  -0.122 0.902545    
## airport_dist        0.039999   0.022414   1.785 0.074469 .  
## conservative       -0.192255   0.023487  -8.186 4.38e-16 ***
## male               -0.054249   0.020924  -2.593 0.009582 ** 
## age                -0.015480   0.020419  -0.758 0.448449    
## popdens             0.197538   0.021860   9.037  < 2e-16 ***
## manufact           -0.014696   0.023468  -0.626 0.531255    
## tourism            -0.015331   0.022028  -0.696 0.486509    
## academics          -0.148486   0.040571  -3.660 0.000258 ***
## medinc              0.242370   0.032107   7.549 6.24e-14 ***
## healthcare          0.034985   0.025290   1.383 0.166693    
## temp                0.155413   0.024405   6.368 2.29e-10 ***
## case_rate_0415_lag  0.187843   0.034440   5.454 5.43e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9219 on 2353 degrees of freedom
## Multiple R-squared:  0.1545, Adjusted R-squared:  0.1502 
## F-statistic: 35.83 on 12 and 2353 DF,  p-value: < 2.2e-16
lm_cases_0930 <- spec_calculate(df = df_us_death_scaled, 
               y = "case_rate_0930", 
               model = "lm", 
               controls = covariates %>% append('case_rate_0930_lag'),
               all.comb = T)

calc_all_summaries(lm_cases_0930)
## $pers_o
##         estimate   std.error statistic    p.value    conf.low   conf.high
## mean -0.08748817 0.020625280 -4.336426 0.06828373 -0.12793374 -0.04704261
## sd    0.04752857 0.001548285  2.489699 0.17839331  0.04636929  0.04884932
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean     0.2576031         0.2554435 0.86051765     125.08352 1.479292e-08
## sd       0.1115422         0.1115596 0.06377168      61.40988 4.197068e-07
##        fit_df fit_logLik  fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2991.3823 6000.765 6052.6852    1755.7688     2358.000000
## sd   1.732262   173.8364  345.775  340.4391     263.7973        1.732262
##      fit_nobs significant   positive  negative significant_positive
## mean     2366   0.8127441 0.01391602 0.9860840                    0
## sd          0   0.3901644 0.11715678 0.1171568                    0
##      significant_negative
## mean            0.8127441
## sd              0.3901644
## 
## $pers_c
##        estimate    std.error statistic     p.value   conf.low conf.high
## mean 0.12308312 0.0184266866  6.583651 0.001060154 0.08694894 0.1592173
## sd   0.05683318 0.0009247264  2.811747 0.004025125 0.05550506 0.0581875
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.26752476        0.26538940 0.85530837     130.36518 3.170247e-32
## sd      0.09585838        0.09583076 0.05530759      53.15408 6.694621e-31
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2978.5255 5975.0511 6026.9717    1732.3039     2358.000000
## sd   1.732262   152.1392  302.3326  296.8751     226.7051        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        1        0                    1
## sd          0           0        0        0                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##        estimate   std.error statistic    p.value   conf.low  conf.high
## mean 0.06069173 0.018035486 3.3280702 0.01604293 0.02532467 0.09605878
## sd   0.01982743 0.001245754 0.9305813 0.04801242 0.01817235 0.02163228
##      fit_r.squared fit_adj.r.squared fit_sigma fit_statistic  fit_p.value
## mean     0.2542249         0.2520547 0.8626706     122.18282 5.151662e-06
## sd       0.1072672         0.1072635 0.0612009      57.77144 9.231320e-05
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2997.8262 6013.6525 6065.5731    1763.7582     2358.000000
## sd   1.732262   166.5089  331.0699  325.5924     253.6869        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.9135742        1        0            0.9135742
## sd          0   0.2810261        0        0            0.2810261
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate   std.error statistic      p.value   conf.low  conf.high
## mean 0.20856723 0.018548064 11.178947 1.455611e-10 0.17219502 0.24493944
## sd   0.06258162 0.000809816  3.068922 1.114284e-09 0.06164063 0.06354836
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.29257675        0.29051179 0.84079681     147.40723 4.922386e-58
## sd      0.08593704        0.08589113 0.05049305      51.53961 1.583845e-56
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2938.7056 5895.4112 5947.3318    1673.0560     2358.000000
## sd   1.732262   141.4434  280.9384  275.4853     203.2411        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        1        0                    1
## sd          0           0        0        0                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate   std.error statistic      p.value    conf.low   conf.high
## mean -0.18517486 0.018911721 -9.704595 1.720658e-06 -0.22226019 -0.14808953
## sd    0.05329514 0.001037646  2.400281 2.810106e-05  0.05486468  0.05175801
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.28204011        0.27994829 0.84680190     139.91098 8.135163e-27
## sd      0.09375431        0.09371317 0.05458002      53.56275 2.424387e-25
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2954.9142 5927.8285 5979.7491    1697.9751     2358.000000
## sd   1.732262   151.5238  301.0411  295.4047     221.7289        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        0        1                    0
## sd          0           0        0        0                    0
##      significant_negative
## mean                    1
## sd                      0
plot_all_curves(lm_cases_0930, 'lm_cases_0930')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_cases_0930_ctrl <- lm(case_rate_0930 ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + case_rate_0930_lag, 
                             data = df_us_death_scaled)

summary(lm_cases_0930_ctrl)
## 
## Call:
## lm(formula = case_rate_0930 ~ airport_dist + conservative + male + 
##     age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + case_rate_0930_lag, data = df_us_death_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5977 -0.4787 -0.1204  0.3630  6.7757 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.001975   0.015604  -0.127  0.89929    
## airport_dist        0.119021   0.018459   6.448 1.37e-10 ***
## conservative       -0.214833   0.019372 -11.090  < 2e-16 ***
## male                0.152308   0.017222   8.844  < 2e-16 ***
## age                -0.293421   0.016809 -17.457  < 2e-16 ***
## popdens             0.009099   0.017999   0.506  0.61323    
## manufact            0.173597   0.019327   8.982  < 2e-16 ***
## tourism            -0.021363   0.018148  -1.177  0.23923    
## academics          -0.107078   0.033555  -3.191  0.00144 ** 
## medinc             -0.025005   0.026524  -0.943  0.34592    
## healthcare          0.066311   0.020818   3.185  0.00147 ** 
## temp                0.483630   0.021645  22.344  < 2e-16 ***
## case_rate_0930_lag  0.226178   0.028937   7.816 8.14e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7589 on 2353 degrees of freedom
## Multiple R-squared:  0.427,  Adjusted R-squared:  0.4241 
## F-statistic: 146.1 on 12 and 2353 DF,  p-value: < 2.2e-16

Predict Deaths

lm_deaths_0415 <- spec_calculate(df = df_us_death_scaled, 
               y = "death_rate_0415", 
               model = "lm", 
               controls = covariates %>% append('death_rate_0415_lag'),
               all.comb = T)

calc_all_summaries(lm_deaths_0415)
## $pers_o
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.02159386 0.0232443310 0.9352266 0.3927249 -0.02398759 0.06717531
## sd   0.02622739 0.0008315074 1.1494376 0.3084098  0.02646229 0.02609246
##      fit_r.squared fit_adj.r.squared   fit_sigma fit_statistic  fit_p.value
## mean    0.06227496        0.05950046 0.969742909     22.662019 8.001974e-08
## sd      0.01965118        0.01924889 0.009912406      6.298663 2.447263e-06
##        fit_df fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3280.3844 6578.76886 6630.68946   2217.71972     2358.000000
## sd   1.732262    24.7346   47.20338   41.60315     46.47504        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.1784668 0.7832031 0.2167969            0.1784668
## sd          0   0.3829520 0.4121134 0.4121134            0.3829520
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_c
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.04372995 0.0207956843 2.1162652 0.1315361 0.002950226 0.08450968
## sd   0.01888260 0.0004178388 0.9341132 0.2095286 0.019457170 0.01832666
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.06352029        0.06074950 0.96909709     23.161227 6.476435e-06
## sd      0.01994210        0.01954641 0.01006702      6.585468 2.596939e-04
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3278.80427 6575.60854 6627.52914   2214.77451     2358.000000
## sd   1.732262    25.10753   47.97863   42.46171     47.16306        1.732262
##      fit_nobs significant   positive    negative significant_positive
## mean     2366   0.6140137 0.99804688 0.001953125            0.6140137
## sd          0   0.4868868 0.04415638 0.044156385            0.4868868
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##          estimate    std.error  statistic   p.value     conf.low  conf.high
## mean -0.009297587 0.0202759542 -0.4597789 0.6280569 -0.049058136 0.03046296
## sd    0.007132299 0.0002130225  0.3530586 0.1908608  0.007010183 0.00727638
##      fit_r.squared fit_adj.r.squared fit_sigma fit_statistic  fit_p.value
## mean    0.06151788        0.05874158 0.9701292     22.284747 0.0002391457
## sd      0.02056854        0.02016790 0.0103773      6.638708 0.0091204303
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3281.31517 6580.63034 6632.55094   2219.51021     2358.000000
## sd   1.732262    25.84772   49.42726   43.77151     48.64461        1.732262
##      fit_nobs significant   positive  negative significant_positive
## mean     2366           0 0.08740234 0.9125977                    0
## sd          0           0 0.28245823 0.2824582                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate    std.error statistic    p.value   conf.low  conf.high
## mean 0.08059878 0.0212479128  3.816159 0.01274672 0.03893225 0.12226531
## sd   0.02281645 0.0005825912  1.139402 0.04100308 0.02361434 0.02204891
##      fit_r.squared fit_adj.r.squared   fit_sigma fit_statistic  fit_p.value
## mean    0.06766920        0.06491029 0.966950106     24.935697 1.104777e-08
## sd      0.01948835        0.01910418 0.009860482      6.747553 4.400887e-07
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3273.56134 6565.12267 6617.04328   2204.96235     2358.000000
## sd   1.732262    24.64179   47.10499   41.80496     46.08995        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.9294434        1        0            0.9294434
## sd          0   0.2561141        0        0            0.2561141
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate    std.error  statistic   p.value    conf.low  conf.high
## mean -0.01275703 0.0215914703 -0.6063945 0.3820066 -0.05509727 0.02958321
## sd    0.02276319 0.0004845586  1.0615056 0.2740656  0.02216699 0.02338281
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.06198511        0.05920994 0.96988961     22.517221 1.683153e-06
## sd      0.02023050        0.01983336 0.01020926      6.553321 3.977900e-05
##        fit_df  fit_logLik    fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3280.73490 6579.46980 6631.3904   2218.40522     2358.000000
## sd   1.732262    25.44329   48.63943   43.0717     47.84512        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366  0.06982422 0.2592773 0.7407227                    0
## sd          0  0.25488165 0.4382916 0.4382916                    0
##      significant_negative
## mean           0.06982422
## sd             0.25488165
plot_all_curves(lm_deaths_0415, 'lm_deaths_0415')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_deaths_0415_ctrl <- lm(death_rate_0415 ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + death_rate_0415_lag, 
                             data = df_us_death_scaled)

summary(lm_deaths_0415_ctrl)
## 
## Call:
## lm(formula = death_rate_0415 ~ airport_dist + conservative + 
##     male + age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + death_rate_0415_lag, data = df_us_death_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9826 -0.3573 -0.1845  0.0510 14.3516 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.002784   0.019506  -0.143  0.88650    
## airport_dist         0.030827   0.023066   1.336  0.18153    
## conservative        -0.164472   0.024153  -6.809 1.24e-11 ***
## male                -0.058699   0.021529  -2.727  0.00645 ** 
## age                  0.005348   0.021010   0.255  0.79911    
## popdens              0.149665   0.022492   6.654 3.53e-11 ***
## manufact             0.001234   0.024105   0.051  0.95919    
## tourism             -0.038348   0.022667  -1.692  0.09082 .  
## academics           -0.118379   0.041756  -2.835  0.00462 ** 
## medinc               0.167830   0.033048   5.078 4.11e-07 ***
## healthcare           0.023905   0.026021   0.919  0.35835    
## temp                 0.152491   0.025113   6.072 1.47e-09 ***
## death_rate_0415_lag  0.227223   0.034577   6.572 6.11e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9486 on 2353 degrees of freedom
## Multiple R-squared:  0.1048, Adjusted R-squared:  0.1002 
## F-statistic: 22.96 on 12 and 2353 DF,  p-value: < 2.2e-16
lm_deaths_0930 <- spec_calculate(df = df_us_death_scaled, 
               y = "death_rate_0930", 
               model = "lm", 
               controls = covariates %>% append('death_rate_0930_lag'),
               all.comb = T)

calc_all_summaries(lm_deaths_0930)
## $pers_o
##         estimate   std.error statistic   p.value    conf.low    conf.high
## mean -0.04771222 0.021786652 -2.278399 0.2033106 -0.09043520 -0.004989232
## sd    0.04891033 0.001143027  2.365694 0.2823974  0.04739848  0.050476466
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.17340678        0.17098490 0.90938022      73.76279 0.0003284423
## sd      0.08211462        0.08202695 0.04520183      38.89783 0.0127438710
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3125.5125 6269.0249 6320.9455    1954.8930     2358.000000
## sd   1.732262   118.5453  235.2744  230.2473     194.2011        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.5175781 0.1660156 0.8339844           0.01733398
## sd          0   0.4997519 0.3721401 0.3721401           0.13052845
##      significant_negative
## mean            0.5002441
## sd              0.5000610
## 
## $pers_c
##       estimate   std.error statistic     p.value   conf.low  conf.high
## mean 0.1268692 0.019400070  6.486926 0.001113984 0.08882628 0.16491221
## sd   0.0530807 0.000431052  2.614407 0.003196235 0.05230910 0.05385451
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.18772875        0.18534406 0.90182774      81.06360 3.020223e-31
## sd      0.06656885        0.06643439 0.03691875      31.69162 9.761146e-30
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3106.72700 6231.4540 6283.3746    1921.0215     2358.000000
## sd   1.732262    97.68093  193.4857  188.3267     157.4353        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        1        0                    1
## sd          0           0        0        0                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##         estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.006814176 0.0190529070 0.3370688 0.5684355 -0.03054801 0.04417636
## sd   0.014399735 0.0008842485 0.7376599 0.2709864  0.01342749 0.01550550
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.17020480        0.16777296 0.91123005      71.84871 0.0002407612
## sd      0.07920734        0.07910241 0.04344308      36.74472 0.0097444833
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3130.5609 6279.1219 6331.0425    1962.4657     2358.000000
## sd   1.732262   113.5638  225.2666  220.1163     187.3254        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366  0.01293945 0.6718750 0.3281250           0.01293945
## sd          0  0.11302718 0.4695879 0.4695879           0.11302718
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate    std.error statistic      p.value   conf.low  conf.high
## mean 0.20456071 0.0195666193 10.433477 1.450827e-09 0.16619114 0.24293027
## sd   0.05862264 0.0002714668  2.928455 1.173231e-08 0.05832843 0.05892019
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.21065181        0.20833117 0.88918037      93.99839 4.111013e-56
## sd      0.05703859        0.05688833 0.03205223      29.92537 1.256598e-54
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3073.75974 6165.5195 6217.4401    1866.8085     2358.000000
## sd   1.732262    86.01007  170.1551  165.0649     134.8963        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        1        0                    1
## sd          0           0        0        0                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate    std.error statistic    p.value    conf.low   conf.high
## mean -0.09751528 0.0201788237 -4.781985 0.06488922 -0.13708536 -0.05794521
## sd    0.04905087 0.0007283533  2.339181 0.18515988  0.04998104  0.04814510
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.18025226        0.17784851 0.90582179      76.95645 5.561893e-13
## sd      0.07348383        0.07335924 0.04048169      34.39706 2.643324e-11
##        fit_df fit_logLik   fit_AIC  fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3116.8080 6251.6161 6303.537    1938.7034     2358.000000
## sd   1.732262   106.3505  210.8025  205.555     173.7892        1.732262
##      fit_nobs significant   positive  negative significant_positive
## mean     2366   0.8454590 0.01293945 0.9870605                    0
## sd          0   0.3615107 0.11302718 0.1130272                    0
##      significant_negative
## mean            0.8454590
## sd              0.3615107
plot_all_curves(lm_deaths_0930, 'lm_deaths_0930')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_deaths_0930_ctrl <- lm(death_rate_0930 ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + death_rate_0930_lag, 
                             data = df_us_death_scaled)

summary(lm_deaths_0930_ctrl)
## 
## Call:
## lm(formula = death_rate_0930 ~ airport_dist + conservative + 
##     male + age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + death_rate_0930_lag, data = df_us_death_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9380 -0.5061 -0.1812  0.2849  5.7208 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.001376   0.017228  -0.080  0.93636    
## airport_dist         0.094936   0.020382   4.658 3.37e-06 ***
## conservative        -0.326855   0.021335 -15.320  < 2e-16 ***
## male                -0.078391   0.019013  -4.123 3.87e-05 ***
## age                 -0.056769   0.018559  -3.059  0.00225 ** 
## popdens              0.087295   0.019869   4.394 1.16e-05 ***
## manufact             0.036678   0.021292   1.723  0.08509 .  
## tourism             -0.068205   0.020024  -3.406  0.00067 ***
## academics           -0.248127   0.036920  -6.721 2.26e-11 ***
## medinc               0.068018   0.029185   2.331  0.01986 *  
## healthcare           0.039224   0.022986   1.706  0.08805 .  
## temp                 0.453554   0.023106  19.630  < 2e-16 ***
## death_rate_0930_lag  0.171593   0.031177   5.504 4.12e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8379 on 2353 degrees of freedom
## Multiple R-squared:  0.3015, Adjusted R-squared:  0.2979 
## F-statistic: 84.64 on 12 and 2353 DF,  p-value: < 2.2e-16

Predict Socdist Onset

cox_onset_socdist <- spec_calculate(df = df_us_socdist_scaled, 
               y = "Surv(cpt_day_socdist_2, event)", 
               model = "cox_model", 
               controls = covariates %>% 
                 append(c('cpt_day_socdist_2_lag', 'onset_prev', 'slope_prev')),
               all.comb = T)

cox_onset_socdist <- cox_onset_socdist %>% map(filter_socdist)

cox_onset_socdist_hr <- cox_onset_socdist %>% map(coef_to_hr)

calc_all_summaries(cox_onset_socdist_hr)
## $pers_o
##        estimate    std.error statistic     p.value   conf.low  conf.high fit_n
## mean 0.90084718 0.0251620611 -4.186946 0.001514851 0.85747908 0.94641117  2366
## sd   0.02327209 0.0008332922  1.134641 0.004378419 0.02144995 0.02529138     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       2365         234.86781    5.412147e-22        224.69959
## sd            0          66.65304    5.712933e-21         65.57137
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   1.657834e-20          223.08702      3.23802e-20                   NA
## sd     1.946635e-19           64.68873      4.20639e-19                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.09414009         0.9999987      0.63791890
## sd                   NA    0.02557470         0.0000000      0.01724889
##      fit_std.error.concordance   fit_logLik     fit_AIC     fit_BIC fit_nobs
## mean              0.0071943487 -15894.97671 31807.95342 31859.87022     2366
## sd                0.0002517063     33.32652    64.80125    60.26247        0
##      significant positive negative significant_positive significant_negative
## mean  0.99926758        0        1                    0           0.99926758
## sd    0.02705668        0        0                    0           0.02705668
## 
## $pers_c
##       estimate    std.error statistic    p.value  conf.low  conf.high fit_n
## mean 0.9305632 0.0212930328 -3.439328 0.02976033 0.8924980 0.97025352  2366
## sd   0.0271874 0.0006426558  1.464538 0.04948005 0.0250966 0.02942282     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       2365         230.04233    9.389246e-18        220.31793
## sd            0          65.03569    8.934878e-17         63.96218
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   9.055267e-17           218.1110     1.179495e-16                   NA
## sd     8.321553e-16            62.7894     1.056573e-15                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.09230748         0.9999987      0.63684492
## sd                   NA    0.02505988         0.0000000      0.01686053
##      fit_std.error.concordance   fit_logLik     fit_AIC     fit_BIC fit_nobs
## mean              0.0072149160 -15897.38945 31812.77891 31864.69571     2366
## sd                0.0002147792     32.51784    63.01764    57.96853        0
##      significant positive negative significant_positive significant_negative
## mean   0.7814941        0        1                    0            0.7814941
## sd     0.4132829        0        0                    0            0.4132829
## 
## $pers_e
##        estimate    std.error statistic   p.value   conf.low conf.high fit_n
## mean 1.02426578 0.0213783358 1.1179169 0.3310683 0.98223389 1.0680964  2366
## sd   0.01204374 0.0001072928 0.5488602 0.2332412 0.01152189 0.0125931     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       2365         217.59045    4.479032e-13        207.16637
## sd            0          74.34435    3.637662e-12         73.62063
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   4.870524e-12          205.28108     6.898468e-12                   NA
## sd     3.612500e-11           72.55058     4.849513e-11                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.08741196         0.9999987      0.63445954
## sd                   NA    0.02878782         0.0000000      0.01893728
##      fit_std.error.concordance   fit_logLik     fit_AIC     fit_BIC fit_nobs
## mean              0.0072780187 -15903.61539 31825.23079 31877.14759     2366
## sd                0.0002501295     37.17218    72.42525    67.58321        0
##      significant positive negative significant_positive significant_negative
## mean  0.03979492        1        0           0.03979492                    0
## sd    0.19550094        0        0           0.19550094                    0
## 
## $pers_a
##        estimate    std.error statistic   p.value   conf.low  conf.high fit_n
## mean 1.00925711 0.0215170876 0.3206578 0.1308660 0.96752121 1.05279642  2366
## sd   0.04240161 0.0008720833 1.9443295 0.1687364 0.03927003 0.04575063     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       2365         219.91522    3.098970e-13        209.08456
## sd            0          74.34997    2.452981e-12         72.81736
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   3.048073e-12          207.31414     4.251963e-12                   NA
## sd     2.230823e-11           71.92873     2.986865e-11                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.08830803         0.9999987      0.63489406
## sd                   NA    0.02877147         0.0000000      0.01796452
##      fit_std.error.concordance   fit_logLik     fit_AIC     fit_BIC fit_nobs
## mean              0.0072595031 -15902.45301 31822.90601 31874.82281     2366
## sd                0.0002430652     37.17499    72.45016    67.66904        0
##      significant  positive  negative significant_positive significant_negative
## mean   0.4101562 0.5031738 0.4968262            0.2971191            0.1130371
## sd     0.4919219 0.5000510 0.5000510            0.4570452            0.3166768
## 
## $pers_n
##        estimate    std.error statistic    p.value   conf.low  conf.high fit_n
## mean 1.07129629 0.0227821273  3.028251 0.05228809 1.02453397 1.12019435  2366
## sd   0.02975165 0.0005711819  1.264375 0.10005770 0.02922361 0.03029421     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       2365         226.77298    2.340417e-18        216.21052
## sd            0          66.03596    2.269295e-17         64.65633
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   1.974366e-17          213.98344     2.566388e-17                   NA
## sd     1.664079e-16           63.52385     2.065698e-16                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.09104196         0.9999987      0.63592775
## sd                   NA    0.02545169         0.0000000      0.01772905
##      fit_std.error.concordance   fit_logLik     fit_AIC     fit_BIC fit_nobs
## mean              0.0072763743 -15899.02413 31816.04825 31867.96505     2366
## sd                0.0002555284     33.01798    64.09478    59.27922        0
##      significant positive negative significant_positive significant_negative
## mean   0.7404785        1        0            0.7404785                    0
## sd     0.4384256        0        0            0.4384256                    0
plot_all_curves(cox_onset_socdist_hr, 'cox_onset_socdist_hr', hr = T)
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

cox_onset_socdist_ctrl <- coxph(Surv(cpt_day_socdist_2, event) ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + onset_prev + slope_prev +
                                 cpt_day_socdist_2_lag, 
                             data = df_us_socdist_scaled)

summary(cox_onset_socdist_ctrl)
## Call:
## coxph(formula = Surv(cpt_day_socdist_2, event) ~ airport_dist + 
##     conservative + male + age + popdens + manufact + tourism + 
##     academics + medinc + healthcare + temp + onset_prev + slope_prev + 
##     cpt_day_socdist_2_lag, data = df_us_socdist_scaled)
## 
##   n= 2366, number of events= 2365 
## 
##                           coef exp(coef) se(coef)       z Pr(>|z|)    
## airport_dist          -0.12768   0.88013  0.02573  -4.962 6.98e-07 ***
## conservative           0.09538   1.10007  0.02666   3.577 0.000347 ***
## male                  -0.05323   0.94816  0.02335  -2.279 0.022655 *  
## age                   -0.01756   0.98259  0.02205  -0.797 0.425615    
## popdens               -0.07439   0.92831  0.02794  -2.663 0.007750 ** 
## manufact               0.04968   1.05093  0.02457   2.022 0.043180 *  
## tourism               -0.11767   0.88899  0.02528  -4.655 3.24e-06 ***
## academics              0.02042   1.02063  0.04352   0.469 0.639026    
## medinc                -0.07733   0.92558  0.03477  -2.224 0.026136 *  
## healthcare             0.03931   1.04009  0.02802   1.403 0.160717    
## temp                  -0.29100   0.74752  0.02704 -10.761  < 2e-16 ***
## onset_prev            -0.03523   0.96539  0.03418  -1.031 0.302745    
## slope_prev            -0.11930   0.88754  0.03333  -3.580 0.000344 ***
## cpt_day_socdist_2_lag -0.23676   0.78918  0.04751  -4.983 6.25e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       exp(coef) exp(-coef) lower .95 upper .95
## airport_dist             0.8801     1.1362    0.8368    0.9257
## conservative             1.1001     0.9090    1.0441    1.1591
## male                     0.9482     1.0547    0.9057    0.9926
## age                      0.9826     1.0177    0.9410    1.0260
## popdens                  0.9283     1.0772    0.8789    0.9806
## manufact                 1.0509     0.9515    1.0015    1.1028
## tourism                  0.8890     1.1249    0.8460    0.9341
## academics                1.0206     0.9798    0.9372    1.1115
## medinc                   0.9256     1.0804    0.8646    0.9909
## healthcare               1.0401     0.9615    0.9845    1.0988
## temp                     0.7475     1.3378    0.7089    0.7882
## onset_prev               0.9654     1.0359    0.9028    1.0323
## slope_prev               0.8875     1.1267    0.8314    0.9474
## cpt_day_socdist_2_lag    0.7892     1.2671    0.7190    0.8662
## 
## Concordance= 0.661  (se = 0.007 )
## Likelihood ratio test= 344.5  on 14 df,   p=<2e-16
## Wald test            = 320.8  on 14 df,   p=<2e-16
## Score (logrank) test = 325.7  on 14 df,   p=<2e-16

Predict Socdist Mean

lm_mean_socdist <- spec_calculate(df = df_us_socdist_scaled, 
               y = "mean_diff_socdist_2", 
               model = "lm", 
               controls = covariates %>% 
                 append(c('mean_diff_socdist_2_lag', 'onset_prev', 'slope_prev')), 
               all.comb = T)

lm_mean_socdist <- lm_mean_socdist %>% map(filter_socdist)

calc_all_summaries(lm_mean_socdist)
## $pers_o
##        estimate    std.error statistic      p.value   conf.low  conf.high
## mean 0.14021047 0.0194929117  7.205979 8.705399e-07 0.10198542 0.17843551
## sd   0.03452777 0.0006951486  1.812431 7.476719e-06 0.03461717 0.03449206
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic   fit_p.value
## mean    0.35328573        0.35083507 0.80530040     146.96585 1.503377e-124
## sd      0.04184288        0.04168147 0.02561953      24.32935 4.585680e-123
##        fit_df fit_logLik  fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2838.6814 5699.363 5762.8213   1529.47925     2356.000000
## sd   1.732262    75.1761  148.104  141.8935     98.95842        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        1        0                    1
## sd          0           0        0        0                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_c
##         estimate    std.error statistic      p.value    conf.low   conf.high
## mean -0.09138612 0.0175028708 -5.202841 0.0001219264 -0.12570875 -0.05706349
## sd    0.02382600 0.0005762093  1.266658 0.0005332005  0.02445777  0.02323204
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean     0.3459287        0.34345301 0.80980432     141.97404 1.794927e-99
## sd       0.0455359        0.04536277 0.02764305      22.82046 5.855583e-98
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2851.70314 5725.4063 5788.8648    1546.8787     2356.000000
## sd   1.732262    80.44351  158.4916  151.8134     107.6924        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        0        1                    0
## sd          0           0        0        0                    0
##      significant_negative
## mean                    1
## sd                      0
## 
## $pers_e
##          estimate    std.error  statistic   p.value    conf.low  conf.high
## mean -0.005150281 0.0170325770 -0.3254585 0.3943169 -0.03855068 0.02825012
## sd    0.018590856 0.0005492686  1.0726356 0.2595924  0.01788979 0.01932655
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.33818294        0.33567872 0.81453789     137.17003 1.467243e-86
## sd      0.04833435        0.04817127 0.02914621      23.33049 5.538707e-85
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2865.36119 5752.7224 5816.1809    1565.1974     2356.000000
## sd   1.732262    84.18746  166.0151  159.4343     114.3107        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366  0.04589844 0.2880859 0.7119141           0.03637695
## sd          0  0.20929038 0.4529266 0.4529266           0.18724911
##      significant_negative
## mean          0.009521484
## sd            0.097124295
## 
## $pers_a
##         estimate    std.error statistic    p.value    conf.low   conf.high
## mean -0.07066438 0.0179947707 -3.905691 0.02573413 -0.10595161 -0.03537715
## sd    0.03237484 0.0005958765  1.737321 0.07033431  0.03290196  0.03188185
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.34307315        0.34058648 0.81157397     140.18752 5.652261e-98
## sd      0.04555493        0.04538166 0.02759323      22.68894 2.193977e-96
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2856.87873 5735.7575 5799.2160    1553.6320     2356.000000
## sd   1.732262    80.11993  157.8544  151.2078     107.7374        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.8581543        0        1                    0
## sd          0   0.3489344        0        0                    0
##      significant_negative
## mean            0.8581543
## sd              0.3489344
## 
## $pers_n
##        estimate    std.error statistic   p.value   conf.low  conf.high
## mean 0.05788521 0.0182294883  3.197587 0.0937796 0.02213770 0.09363271
## sd   0.03644144 0.0005360911  2.007429 0.2089676 0.03681758 0.03609200
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean     0.3416623        0.33917212 0.81234918     139.36614 2.498468e-86
## sd       0.0500918        0.04993868 0.03028064      24.26272 9.696779e-85
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2858.86918 5739.7384 5803.1969    1556.9687     2356.000000
## sd   1.732262    87.63406  172.9206  166.3678     118.4671        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.7509766 0.8999023 0.1000977            0.7460938
## sd          0   0.4325002 0.3001668 0.3001668            0.4352977
##      significant_negative
## mean          0.004882812
## sd            0.069714828
plot_all_curves(lm_mean_socdist, 'lm_mean_socdist')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_mean_socdist_ctrl <- lm(mean_diff_socdist_2 ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + onset_prev + slope_prev +
                                 mean_diff_socdist_2_lag,
                             data = df_us_socdist_scaled)

summary(lm_mean_socdist_ctrl)
## 
## Call:
## lm(formula = mean_diff_socdist_2 ~ airport_dist + conservative + 
##     male + age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + onset_prev + slope_prev + mean_diff_socdist_2_lag, 
##     data = df_us_socdist_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6309 -0.2614  0.0611  0.3676  2.6202 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.0023280  0.0157422   0.148  0.88245    
## airport_dist            -0.0964326  0.0186435  -5.172 2.51e-07 ***
## conservative            -0.1728838  0.0201691  -8.572  < 2e-16 ***
## male                    -0.0058333  0.0175253  -0.333  0.73928    
## age                      0.0833188  0.0171674   4.853 1.29e-06 ***
## popdens                  0.1215938  0.0184869   6.577 5.88e-11 ***
## manufact                -0.0236647  0.0195175  -1.212  0.22545    
## tourism                  0.0744721  0.0184003   4.047 5.35e-05 ***
## academics                0.0745873  0.0341493   2.184  0.02905 *  
## medinc                   0.2866240  0.0272018  10.537  < 2e-16 ***
## healthcare               0.0008116  0.0210337   0.039  0.96922    
## temp                     0.0572502  0.0208304   2.748  0.00603 ** 
## onset_prev              -0.0587622  0.0252012  -2.332  0.01980 *  
## slope_prev               0.0714963  0.0237801   3.007  0.00267 ** 
## mean_diff_socdist_2_lag  0.3108043  0.0273882  11.348  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7657 on 2351 degrees of freedom
## Multiple R-squared:  0.4172, Adjusted R-squared:  0.4138 
## F-statistic: 120.2 on 14 and 2351 DF,  p-value: < 2.2e-16

Save all results

results_us <- list(cox_onset_prev_hr, lm_slope_prev, lm_slope_prev_var, 
                   lm_cases_0415, lm_cases_0930, lm_deaths_0415, lm_deaths_0930, 
                   cox_onset_socdist_hr, lm_mean_socdist)

names(results_us) <- list('cox_onset_prev_hr', 'lm_slope_prev', 'lm_slope_prev_var', 
                   'lm_cases_0415', 'lm_cases_0930', 'lm_deaths_0415', 'lm_deaths_0930', 
                   'cox_onset_socdist_hr', 'lm_mean_socdist')

save(results_us, file="US_spec_results.RData")

Descriptives

Distributions

death_temp <- df_us_death_unscaled %>% select(county_fips:death_rate_0930)
onset_temp <- df_us_prev_unscaled %>% select(county_fips, onset_prev, slope_prev)

df_us_desc <- df_us_socdist_unscaled %>% 
  plyr::join(death_temp, by="county_fips") %>%
  dplyr::select(pers_o, pers_c, pers_e, pers_a, pers_n, 
                age, male, conservative, 
                academics, medinc, manufact, 
                airport_dist, tourism, healthcare, popdens,
                temp, onset_prev, slope_prev, 
                case_rate_0415, case_rate_0930, death_rate_0415, death_rate_0930,
                cpt_day_socdist_2, mean_diff_socdist_2)
          

df_us_desc %>% select(age) %>% summary()
##       age       
##  Min.   :22.90  
##  1st Qu.:37.70  
##  Median :40.70  
##  Mean   :40.59  
##  3rd Qu.:43.40  
##  Max.   :67.00
us_means <- df_us_desc %>% summarise_all(mean)
us_sd <- df_us_desc %>% summarise_all(sd)

desc_us <- rbind(us_means, us_sd) %>% t() %>% round(3) %>% as.data.frame() 
names(desc_us) <- c('mean', 'sd')
desc_us %>% rownames_to_column() %>% write_csv('us_descriptives.csv')
desc_us

Explore correlations

a <- df_us_socdist_unscaled %>% select(county_fips, pers_o:pers_n, cpt_day_socdist, mean_diff_socdist)
b <- df_us_prev_unscaled %>% select(county_fips, onset_prev, slope_prev)
c <- df_us_death_unscaled %>% select(county_fips, case_rate_0415:death_rate_0930)

us_joined <- plyr::join_all(list(a, b, c), by='county_fips')

us_joined %>% select(-county_fips) %>% cor(use = 'pairwise.complete')
##                        pers_o      pers_a      pers_e      pers_c      pers_n
## pers_o             1.00000000 -0.15281714 -0.08497946 -0.04802258 -0.22873070
## pers_a            -0.15281714  1.00000000  0.24480399  0.65337156 -0.39348271
## pers_e            -0.08497946  0.24480399  1.00000000  0.15985220 -0.39790623
## pers_c            -0.04802258  0.65337156  0.15985220  1.00000000 -0.40801964
## pers_n            -0.22873070 -0.39348271 -0.39790623 -0.40801964  1.00000000
## cpt_day_socdist    0.13992321  0.08453292  0.02403108  0.09085251 -0.12512306
## mean_diff_socdist  0.33391055 -0.23412700 -0.01723667 -0.21269607  0.06173468
## onset_prev        -0.29216003 -0.09682226 -0.07478377 -0.09611230  0.26005706
## slope_prev         0.19638054  0.12336567  0.06290779  0.09638924 -0.18203280
## case_rate_0415     0.14548085  0.07765531  0.05022219  0.04619685 -0.12545916
## death_rate_0415    0.08588663  0.09385457  0.02045067  0.05738798 -0.06854497
## case_rate_0930    -0.09448026  0.32267793  0.07255061  0.23696948 -0.20943857
## death_rate_0930   -0.01762812  0.31188291  0.02183955  0.22741935 -0.12688753
##                   cpt_day_socdist mean_diff_socdist  onset_prev  slope_prev
## pers_o                 0.13992321        0.33391055 -0.29216003  0.19638054
## pers_a                 0.08453292       -0.23412700 -0.09682226  0.12336567
## pers_e                 0.02403108       -0.01723667 -0.07478377  0.06290779
## pers_c                 0.09085251       -0.21269607 -0.09611230  0.09638924
## pers_n                -0.12512306        0.06173468  0.26005706 -0.18203280
## cpt_day_socdist        1.00000000        0.15266953 -0.14248381  0.19833550
## mean_diff_socdist      0.15266953        1.00000000 -0.26415088  0.27105720
## onset_prev            -0.14248381       -0.26415088  1.00000000 -0.72372322
## slope_prev             0.19833550        0.27105720 -0.72372322  1.00000000
## case_rate_0415         0.15305730        0.24381339 -0.47200894  0.86665276
## death_rate_0415        0.12568626        0.19568034 -0.39929052  0.75658883
## case_rate_0930         0.14915336       -0.40302394 -0.14180907  0.22824887
## death_rate_0930        0.20710211       -0.09734626 -0.27900660  0.51420879
##                   case_rate_0415 death_rate_0415 case_rate_0930 death_rate_0930
## pers_o                0.14548085      0.08588663    -0.09448026     -0.01762812
## pers_a                0.07765531      0.09385457     0.32267793      0.31188291
## pers_e                0.05022219      0.02045067     0.07255061      0.02183955
## pers_c                0.04619685      0.05738798     0.23696948      0.22741935
## pers_n               -0.12545916     -0.06854497    -0.20943857     -0.12688753
## cpt_day_socdist       0.15305730      0.12568626     0.14915336      0.20710211
## mean_diff_socdist     0.24381339      0.19568034    -0.40302394     -0.09734626
## onset_prev           -0.47200894     -0.39929052    -0.14180907     -0.27900660
## slope_prev            0.86665276      0.75658883     0.22824887      0.51420879
## case_rate_0415        1.00000000      0.80803632     0.19781085      0.46226490
## death_rate_0415       0.80803632      1.00000000     0.14786594      0.48273071
## case_rate_0930        0.19781085      0.14786594     1.00000000      0.59222976
## death_rate_0930       0.46226490      0.48273071     0.59222976      1.00000000
us_means <- us_joined %>% select(-county_fips) %>% summarise_all(mean)
us_sd <- us_joined %>% select(-county_fips) %>% summarise_all(sd)

desc_us <- rbind(us_means, us_sd) %>% t() %>% round(3) %>% as.data.frame() %>%
  rename(mean=V1, sd=V2)

desc_us %>% rownames_to_column() %>% write_csv('us_descriptives.csv')
desc_us  

Visualization

Example Plots

# get county names
counties <- read_csv('US_personality_2.csv') %>%
  select(county, county_name) %>%
  dplyr::rename(county_fips = county) %>%
  mutate(county_fips = as.character(county_fips)) %>%
  distinct()

df_us_prev <- read_csv('US_prevalence.csv')

df_us_prev <- df_us_prev %>% 
  select(fips, date, rate) %>% 
  mutate(date = as.Date(date, "%d%b%Y")) %>% 
  dplyr::rename(county_fips = fips, 
         rate_day = rate) %>%
  mutate(county_fips = as.character(county_fips)) %>% 
  filter(rate_day >= 0 & rate_day <= 1000)

# merge with processed data frames
df_us_prev <- df_us_prev %>% inner_join(counties, by = 'county_fips')
df_us_socdist <- df_us_socdist %>% inner_join(counties, by = 'county_fips')
gg_prev <- df_us_prev %>% 
  filter(county_fips == '8037' | county_fips == '19079') %>%
  filter(date >= '2020-03-1' & date <= '2020-05-01') %>%
  mutate(window = ifelse(date >= '2020-03-15' & date <= '2020-04-15', 
                         'in window', 'out of window')) %>%
  ggplot(aes(x=date, y=rate_day)) + 
  geom_point(aes(col=county_name)) + 
  geom_line(aes(col=county_name)) + 
  theme_light() +
  theme(legend.position="bottom", legend.margin=margin(t=-20)) +
  guides(color=guide_legend(title="")) +
  annotate('rect', xmin = as.Date('2020-03-15'), 
           xmax = as.Date('2020-04-15'), 
           ymin = -Inf, ymax = Inf, alpha = 0.07) +
  geom_segment(aes(x = as.Date('2020-04-07'), y = 0, 
                   xend = as.Date('2020-04-07'), yend = 1), 
               colour = "#00BFC4") +
  geom_segment(aes(x = as.Date('2020-03-09'), y = 0, 
                   xend = as.Date('2020-03-09'), yend = 1), 
               colour = "#F8766D") +
  annotate("text", x = as.Date('2020-04-07'), 
           y = 2, label = "Onset \n Hamilton County", color = "#00BFC4", size = 3) +
  annotate("text", x = as.Date('2020-03-09'), 
           y = 2, label = "Onset \n Eagle County", color = "#F8766D", size = 3) +
  annotate("text", x = as.Date('2020-03-31'), 
           y = 9.5, label = "Time window used for\nanalysis of growth rates", 
           color = "grey50", size = 3) +
  ylab('Prevalence') +
  xlab('') +
  ylim(c(-1,10)) +
  ggtitle("COVID-19 - comparison of onsets and \n growth rates in two counties") +
  theme(plot.title = element_text(size=11)) +
  theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())

gg_prev

gg_socdist <- df_us_socdist %>% 
  filter(county_fips == 19137 | county_fips == 36081) %>%
  filter(date >= '2020-03-1' & date <= '2020-05-01') %>%
  ggplot(aes(x=date, y=socdist_tiles)) + 
  #facet_wrap(~countyname) +
  geom_point(aes(col=county_name)) + 
  geom_line(aes(col=county_name)) + 
  theme_light() +
  theme(legend.position="bottom",legend.margin=margin(t=-20)) +
  guides(color=guide_legend(title="")) +
    geom_segment(aes(x = as.Date('2020-03-23'), y = -.6, 
                   xend = as.Date('2020-03-23'), yend = 0), 
               colour = "#00BFC4") +
  geom_segment(aes(x = as.Date('2020-03-23'), y = -.6, 
                   xend = as.Date('2020-04-28'), yend = -.6), 
               colour = "#00BFC4") +
  geom_segment(aes(x = as.Date('2020-03-14'), y = -.156, 
                   xend = as.Date('2020-03-14'), yend = .17), 
               colour = "#F8766D") +
  geom_segment(aes(x = as.Date('2020-03-14'), y = -.156, 
                   xend = as.Date('2020-04-28'), yend = -.156), 
               colour = "#F8766D") +
  annotate("text", x = as.Date('2020-03-26'), 
           y = .08, label = "Change point\nQueens County", color = "#00BFC4", size = 3) +
  annotate("text", x = as.Date('2020-03-16'), 
           y = .25, label = "Change point\nMontgommery County", color = "#F8766D", size = 3) +
  ylab('Social distancing') +
  xlab("") +
  ylim(c(-.7,.3)) +
  ggtitle("Social distancing - comparison of change \n points and means in two counties") + 
  theme(plot.title = element_text(size=11)) +
  theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())

gg_socdist

figure <- ggarrange(gg_prev, gg_socdist,
                    labels = c("A", "B"),
                    ncol = 2, nrow = 1)
figure

# create sequence of dates
date_sequence <- seq.Date(min(df_us_prev$date),
                          max(df_us_prev$date), 1)
                     
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence)) 
names(df_dates) <- c('date', 'time')

tto <- df_us_prev_unscaled %>%
  merge(df_dates, by.x = 'onset_prev', by.y = 'time') %>%
  filter(event==1) %>%
  ggplot(aes(x=date+1)) +
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - COVID-19 Onset') + 
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Date of March')
  
soa <- df_us_slope_prev %>% 
  ggplot(aes(x=slope_prev)) +
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - COVID-19 Growth Rates') +
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Standardized growth rates')


figure <- ggarrange(tto, soa,
                    labels = c("", ""),
                    ncol = 2, nrow = 1)
figure

tto <- df_us_death_unscaled %>% 
  ggplot(aes(x=case_rate_0930)) + 
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - Cumulative Case Rates') + 
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Cases Per 1000 Inhabitants')
  
soa <- df_us_death_unscaled %>%
  ggplot(aes(x=death_rate_0930)) +
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - Cumulative Death Rates') +
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Deaths Per 1000 Inhabitants')


figure <- ggarrange(tto, soa,
                    labels = c("", ""),
                    ncol = 2, nrow = 1)
figure

# create sequence of dates
date_sequence <- seq.Date(min(df_us_socdist$date),
                          max(df_us_socdist$date), 1)
                     
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence)) 
names(df_dates) <- c('date', 'time')

tto <- df_us_cpt_socdist %>% 
  merge(df_dates, by.x = 'cpt_day_socdist_2', by.y = 'time') %>%
  filter(cpt_day_socdist_2 < 30) %>% 
  ggplot(aes(x=date+1)) + 
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - Time to Adoption') + 
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Date') +
  xlim(c(as.Date('2020-03-01'), as.Date('2020-03-31')))

soa <- df_us_cpt_socdist %>% 
  ggplot(aes(x=mean_diff_socdist_2)) + 
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - Strength of Adjustment') + 
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Standardized mean difference') 


figure <- ggarrange(tto, soa,
                    labels = c("", ""),
                    ncol = 2, nrow = 1)
figure

Robustness check FB mobility data

df_us_google <- read_csv("../Google/2020_US_Region_Mobility_Report.csv")

df_us_fb <- df_us_socdist %>%
  select(county_fips, date, socdist_tiles, socdist_single_tile)

df_us_ggl <- df_us_google %>% 
  select(census_fips_code, date,
         retail_and_recreation_percent_change_from_baseline:residential_percent_change_from_baseline) %>%
  drop_na() %>%
  mutate(county_fips = as.character(as.numeric(census_fips_code))) %>%
  select(-census_fips_code)

df_us_fb_ggl <- df_us_fb %>% plyr::join(df_us_ggl, c('county_fips', 'date')) %>% drop_na()

df_us_fb_ggl %>% split(.$county_fips) %>%
  map(~ map_if(., is.numeric, scale)) %>%
  bind_rows() %>%
  select(socdist_tiles:residential_percent_change_from_baseline) %>%
  drop_na() %>%
  cor() %>% 
  as.data.frame() %>%
  select(c(1,2)) %>% round(3) %>% write.csv()
## "","socdist_tiles","socdist_single_tile"
## "socdist_tiles",1,-0.971
## "socdist_single_tile",-0.971,1
## "retail_and_recreation_percent_change_from_baseline",0.945,-0.939
## "grocery_and_pharmacy_percent_change_from_baseline",0.766,-0.795
## "parks_percent_change_from_baseline",0.44,-0.442
## "transit_stations_percent_change_from_baseline",0.889,-0.888
## "workplaces_percent_change_from_baseline",0.919,-0.925
## "residential_percent_change_from_baseline",-0.906,0.897

Visualize distribution of effect sizes

load("US_spec_results.RData")
load("../GER/GER_spec_results.RData")
plot_dist <- function(df, filename){
  
  w <- 5
  h <- 5
  
  file_path_eps <- paste0("../../Plots/COMP/", filename,".eps")
  file_path_pdf <- paste0("../../Plots/COMP/", filename,".pdf")
  file_path_png <- paste0("../../Plots/COMP/", filename,".png")
  
    
      ggplot(df, aes(x=estimate, fill=country), alpha=.3) + 
        geom_histogram(alpha=.5, position="identity") +
      #coord_fixed(ratio = 0.1) +
        theme_bw() +
      theme(panel.border = element_blank(), panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(), 
            axis.line = element_line(colour = "black")) +
        theme(axis.title.x=element_blank(),
              axis.text.y=element_blank(),
              axis.title.y=element_blank()) +
        theme(legend.position="none") +
      ggsave(file=file_path_eps, device = 'eps', width=w, height=h)+
      ggsave(file=file_path_pdf, device = 'pdf', width=w, height=h)+
      ggsave(file=file_path_png, device = 'png', width=w, height=h)
    
    
}

# define function to plot all curves in list
plot_all_dist <- function(ls, analysis, hr=F){
  
  pers <- c('o', 'c', 'e', 'a', 'n')
  filenames <- as.list(paste0(analysis, '_', pers))
  pmap(list(ls, filenames), plot_dist)
}
join_results <- function(list_us, list_ger){
  
  US = "US"
  GER = "GER"

  list_us <- list_us %>% 
    map(~cbind(.x, US) %>% rename(country=US))
  list_ger <- list_ger %>% 
    map(~cbind(.x, GER) %>% rename(country=GER))
  
  map2(list_us, list_ger, rbind)
  
}
join_results(results_us$cox_onset_prev_hr, results_ger$cox_onset_prev_hr) %>% 
  plot_all_dist(analysis = 'cox_onset_prev_hr')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_slope_prev, results_ger$lm_slope_prev) %>% 
  plot_all_dist(analysis = 'lm_slope_prev')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_slope_prev_var, results_ger$lm_slope_prev_var) %>% 
  plot_all_dist(analysis = 'lm_slope_prev_var')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_cases_0415, results_ger$lm_cases_0331) %>% 
  plot_all_dist(analysis = 'lm_cases_0415')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_cases_0930, results_ger$lm_cases_0930) %>% 
  plot_all_dist(analysis = 'lm_cases_0930')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_deaths_0415, results_ger$lm_deaths_0331) %>% 
  plot_all_dist(analysis = 'lm_deaths_0415')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_deaths_0930, results_ger$lm_deaths_0930) %>% 
  plot_all_dist(analysis = 'lm_deaths_0930')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$cox_onset_socdist_hr, results_ger$cox_onset_socdist_hr) %>% 
  plot_all_dist(analysis = 'cox_onset_socdist_hr')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_mean_socdist, results_ger$lm_mean_socdist) %>% 
  plot_all_dist(analysis = 'lm_mean_socdist')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

calculate_effsize <- function(df){
  df %>% summarise(d = effsize::cohen.d(estimate ~ country)$estimate, 
                   pval = t.test(estimate ~ country, var.equal = TRUE)$p.value)
}

all_ttest <- map2(results_us, results_ger, join_results) %>% flatten() %>%
  map(calculate_effsize) %>% bind_rows(.id = "model")
models <- rep(names(results_us), each=5)
all_ttest <- cbind(models, all_ttest)

all_ttest
save(all_ttest, file='ttest_results.RData')
======= COVID-19 US

Prepare county level data

Read and format prevalence data

df_us_prev <- read_csv('US_prevalence.csv')

df_us_prev <- df_us_prev %>% 
  select(fips, date, rate) %>% 
  mutate(date = as.Date(date, "%d%b%Y")) %>% 
  dplyr::rename(county_fips = fips, 
         rate_day = rate) %>%
  mutate(county_fips = as.character(county_fips)) %>% 
  filter(rate_day >= 0 & rate_day <= 1000) %>%
  filter(date <= '2020-04-15')
  
df_us_prev %>% write_csv('us_prev_fips.csv')
df_us_death <- read_csv('US_prevalence.csv')

df_us_death <-df_us_death %>%
  mutate(date = as.Date(date, "%d%b%Y"), 
         fips = as.character(fips))

df_us_death_0930 <- df_us_death %>% 
  filter(date == '2020-09-30') %>% 
  dplyr::rename(county_fips = fips,
         case_rate_0930 = rate,
         death_rate_0930 = rate_death) %>% 
  select(county_fips, case_rate_0930, death_rate_0930)

df_us_death_0415 <- df_us_death %>% 
  filter(date == '2020-04-15') %>% 
  dplyr::rename(county_fips = fips,
         case_rate_0415 = rate,
         death_rate_0415 = rate_death) %>% 
  select(county_fips, case_rate_0415, death_rate_0415)

df_us_death <- merge(df_us_death_0415, df_us_death_0930)

# save df
df_us_death %>% write_csv('us_death_county_maps.csv')

Read and format personality data

df_us_pers <- read_csv('US_personality_2.csv')

df_us_pers %>% filter(freq >= 50) %>% .$freq %>% sd()
## [1] 3175.839
df_us_pers <- df_us_pers %>%
  dplyr::rename(county_fips = county,
         pers_o = open, 
         pers_c = sci,
         pers_e = extra,
         pers_a = agree,
         pers_n = neuro) %>%
  filter(freq >= 50) %>% 
  select(-county_name, -freq) %>%
  mutate(county_fips = as.character(county_fips))

df_us_pers %>% .$freq %>% mean
## [1] NA

Read and format county level controls

df_us_ctrl <- read.csv('US_controls_2.csv')

df_us_ctrl <- df_us_ctrl %>% select(-county_name) %>%
  rename(county_fips = county,
         airport_dist = airport_distance,
         conservative = republican,
         age = medage,
         healthcare = physician_pc) %>%
  mutate(county_fips = as.character(county_fips), 
         male=male*100,
         tourism=tourism*100,
         manufact=manufact*100,
         academics=academics*100)

df_us_temp <- read_csv("US_controls_weather.csv") %>% 
  filter(month %in% c('Mar')) %>% 
  group_by(fips) %>% summarise(temp = mean(tempc)) %>%
  rename(county_fips = fips) %>% 
  mutate(county_fips = as.character(as.numeric(county_fips)))

df_us_ctrl <- df_us_ctrl %>% plyr::join(df_us_temp, 'county_fips')

Read and format social distancing data FB

fb_files <- list.files('../FB Data/US individual files',
                       '*.csv', full.names = T)

df_us_socdist <- fb_files %>% 
  map(read_csv) %>% 
  bind_rows()

df_us_socdist <- df_us_socdist %>%
  select(-age_bracket, -gender, -baseline_name, 
         -baseline_type, -polygon_name) %>%
  rename(date = ds,
         county_fips = polygon_id,
         socdist_tiles = all_day_bing_tiles_visited_relative_change,
         socdist_single_tile = all_day_ratio_single_tile_users) %>%
  mutate(county_fips = as.character(county_fips))
 
# save object for descritives 
df_us_socdist_desc <- df_us_socdist

Merge prevalence data

# create sequence of dates
date_sequence <- seq.Date(min(df_us_prev$date),
                          max(df_us_prev$date), 1)
                     
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence)) 
names(df_dates) <- c('date', 'time')

# join data frames
df_us_prev <- df_us_prev %>%
  plyr::join(df_us_ctrl, by='county_fips') %>% 
  plyr::join(df_us_pers, by='county_fips') %>%
  merge(df_dates, by='date') %>% 
  arrange(county_fips, date) %>% 
  drop_na()

df_us_prev %>% select(-date, -rate_day, -time) %>% 
  distinct() %>% write_csv('df_us_pers_fips.csv')

df_us_prev %>% select(county_fips) %>% distinct() %>% nrow()
## [1] 2486
# join data frames 
df_us_death <- df_us_death %>%
  plyr::join(df_us_ctrl, by='county_fips') %>% 
  plyr::join(df_us_pers, by='county_fips') %>%
  drop_na()

# save df
df_us_death %>% 
  #select(county_fips, case_rate, death_rate) %>% 
  write_csv('us_death_fips_controls.csv')

Merge social distancing data

# create sequence of dates
date_sequence <- seq.Date(min(df_us_socdist$date),
                          as.Date('2020-04-28'), 1)
                     
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence)) 
names(df_dates) <- c('date', 'time')

# join data frames 
df_us_socdist <- df_us_socdist %>%
  plyr::join(df_us_ctrl, by='county_fips') %>% 
  plyr::join(df_us_pers, by='county_fips') %>%
  inner_join(df_dates, by='date') %>% 
  arrange(county_fips, date)

fips_complete <- df_us_socdist %>% 
  group_by(county_fips) %>% 
  summarise(n = n()) %>% 
  filter(! n<max(n)) %>% .$county_fips

df_us_socdist <- df_us_socdist %>%
  filter(county_fips %in% fips_complete)

df_us_socdist %>% select(county_fips) %>% distinct() %>% nrow()
## [1] 2582

Control for weekend effect

easter <- seq.Date(as.Date('2020-04-10'), as.Date('2020-04-13'), 1)

df_us_loess <- df_us_socdist %>% 
  mutate(weekday = format(date, '%u')) %>% 
  filter(!weekday %in% c('6','7') | date %in% easter) %>% 
  split(.$county_fips) %>%
  map(~ loess(socdist_single_tile ~ time, data = .)) %>%
  map(predict, 1:max(df_us_socdist$time)) %>% 
  bind_rows() %>% 
  gather(key = 'county_fips', value = 'loess') %>% 
  group_by(county_fips) %>% 
  mutate(time = row_number())

df_us_loess_2 <- df_us_socdist %>% 
  mutate(weekday = format(date, '%u')) %>% 
  filter(!weekday %in% c('6','7') | date %in% easter) %>% 
  split(.$county_fips) %>%
  map(~ loess(socdist_tiles ~ time, data = .)) %>%
  map(predict, 1:max(df_us_socdist$time)) %>% 
  bind_rows() %>% 
  gather(key = 'county_fips', value = 'loess') %>% 
  rename(loess_2 = loess) %>%
  group_by(county_fips) %>% 
  mutate(time = row_number())

df_us_socdist <- df_us_socdist %>% 
  merge(df_us_loess, by=c('county_fips', 'time')) %>% 
  merge(df_us_loess_2, by=c('county_fips', 'time')) %>% 
  mutate(weekday = format(date, '%u')) %>% 
  mutate(socdist_single_tile_clean = ifelse(weekday %in% c('6','7') | date %in% easter, 
                                            loess, socdist_single_tile),
         socdist_tiles_clean = ifelse(weekday %in% c('6','7') | date %in% easter, 
                                            loess_2, socdist_tiles)) %>%
  arrange(county_fips, time) %>% 
  select(-weekday)

df_us_socdist <- df_us_socdist %>% drop_na() %>% mutate(time = time-1)

Pick cleaned social distancing metrics

df_us_socdist <- df_us_socdist %>% 
  mutate(socdist_single_tile = socdist_single_tile_clean,
         socdist_tiles = socdist_tiles_clean) %>% 
  select(-loess, -loess_2, -socdist_single_tile_clean, -socdist_tiles_clean)

Define outcomes of interest

COVID - Extract onset

# get onset day
df_us_onset_prev <- df_us_prev %>% 
  group_by(county_fips) %>%
  filter(rate_day > 0.1) %>%
  summarise(onset_prev = min(time))

# merge with county data
df_us_onset_prev <- df_us_prev %>%
  select(-date, -time, -rate_day) %>%
  distinct() %>%
  mutate(county_fips = as.character(county_fips)) %>%
  left_join(df_us_onset_prev, by = 'county_fips')

# handle censored data
df_us_onset_prev <- df_us_onset_prev %>%
  mutate(event = ifelse(is.na(onset_prev), 0, 1)) %>%
  mutate(onset_prev = replace_na(onset_prev, as.numeric(diff(range(df_us_prev$date)))+1))

COVID - Extract slopes

# cut time series
df_us_prev <- df_us_prev %>% 
  filter(date > '2020-03-15' & date < '2020-04-15')

# extract slope prevalence
df_us_slope_prev <- df_us_prev %>% 
  split(.$county_fips) %>% 
  map(~ lm(log(rate_day+1) ~ time, data = .)) %>%
  map(coef) %>% 
  map_dbl('time') %>% 
  as.data.frame() %>% 
  rownames_to_column('county_fips') %>% 
  rename(slope_prev = '.')
  
# merge with county data
df_us_slope_prev <- df_us_onset_prev %>% 
  inner_join(df_us_slope_prev, by = 'county_fips')

# get unscaled object for descriptives
df_us_prev_desc <- df_us_slope_prev
# cut time series before onset
df_us_slope_var <- df_us_prev %>% 
  filter(date <= '2020-05-15') %>%
  group_by(county_fips) %>% 
  filter(rate_day > 0.1) %>%
  mutate(time = time-min(time)+1) %>% 
  ungroup() %>%
  filter(time <= 30)

# extract slope prevalence
df_us_slope_var <- df_us_slope_var %>% 
  split(.$county_fips) %>% 
  map(~ lm(log(rate_day+1) ~ time, data = .)) %>%
  map(coef) %>% 
  map_dbl('time') %>% 
  as.data.frame() %>% 
  rownames_to_column('county_fips') %>% 
  rename(slope_prev_var = '.')

# merge with county data
df_us_slope_prev <- df_us_slope_prev %>% 
  left_join(df_us_slope_var, by = 'county_fips') %>%
  mutate(slope_prev_var = replace_na(slope_prev_var, 0))

Social Distancing - Change point analysis

# keep only counties with full data
fips_complete <- df_us_socdist %>% 
  group_by(county_fips) %>% 
  summarise(n = n()) %>% 
  filter(n==max(.$n)) %>% 
  .$county_fips

# run changepoint analysis
df_us_socdist_cpt_results <- df_us_socdist %>% 
  select(county_fips, socdist_single_tile) %>%
  filter(county_fips %in% fips_complete) %>% 
  split(.$county_fips) %>%
  map(~ cpt.meanvar(as.vector(.$socdist_single_tile),
                    #penalty = 'Asymptotic',
                    class=TRUE,
                    param.estimates=TRUE,
                    Q=1,
                    test.stat = 'Normal'))

df_us_socdist_cpt_results_2 <- df_us_socdist %>% 
  select(county_fips, socdist_tiles) %>%
  filter(county_fips %in% fips_complete) %>% 
  split(.$county_fips) %>%
  map(~ cpt.meanvar(as.vector(.$socdist_tiles),
                    #penalty = 'Asymptotic',
                    class=TRUE,
                    param.estimates=TRUE,
                    Q=1,
                    test.stat = 'Normal'))

# calculate change point
df_us_socdist_cpt_day <- df_us_socdist_cpt_results %>% 
  map(cpts) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(cpt_day_socdist = '.') %>%
  rownames_to_column('county_fips')

df_us_socdist_cpt_day_2 <- df_us_socdist_cpt_results_2 %>% 
  map(cpts) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(cpt_day_socdist_2 = '.') %>%
  rownames_to_column('county_fips')

# calculate mean differences
df_us_socdist_cpt_mean_diff <- df_us_socdist_cpt_results %>% 
  map(param.est) %>% 
  map(~ .$mean) %>% 
  map(~ .[2]) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(mean_diff_socdist = '.') %>%
  rownames_to_column('county_fips')

df_us_socdist_cpt_mean_diff_2 <- df_us_socdist_cpt_results_2 %>% 
  map(param.est) %>% 
  map(~ .$mean) %>% 
  map(~ -.[2]) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(mean_diff_socdist_2 = '.') %>%
  rownames_to_column('county_fips')

# calculate means
df_us_socdist_mean <- df_us_socdist %>%
  group_by(county_fips) %>%
  summarise(mean_socdist = mean(socdist_single_tile))

df_us_socdist_mean_2 <- df_us_socdist %>%
  group_by(county_fips) %>%
  summarise(mean_socdist_2 = -mean(socdist_tiles))

# merge with county data
df_us_cpt_socdist <- df_us_socdist %>% 
  select(-time, -date, -socdist_single_tile, -socdist_tiles) %>%
  distinct() %>% 
  left_join(df_us_socdist_cpt_day, by='county_fips') %>%
  left_join(df_us_socdist_cpt_day_2, by='county_fips') %>%
  left_join(df_us_socdist_cpt_mean_diff, by='county_fips') %>%
  left_join(df_us_socdist_cpt_mean_diff_2, by='county_fips') %>%
  left_join(df_us_socdist_mean, by='county_fips') %>%
  left_join(df_us_socdist_mean_2, by='county_fips') %>%
  left_join(select(df_us_onset_prev, county_fips, onset_prev), by='county_fips') %>%
  left_join(select(df_us_slope_prev, county_fips, slope_prev), by='county_fips') 

# handle censored data
df_us_cpt_socdist <- df_us_cpt_socdist %>% 
  mutate(cpt_day_socdist = ifelse(is.na(cpt_day_socdist), as.numeric(diff(range(df_us_socdist$date))), cpt_day_socdist)) %>% 
  mutate(event = ifelse(cpt_day_socdist >= as.numeric(diff(range(df_us_socdist$date))), 0, 1))

Remove incomplete cases

df_us_prev_unscaled <- inner_join(df_us_slope_prev, df_us_cpt_socdist %>% select(county_fips), by='county_fips')
df_us_death_unscaled <- inner_join(df_us_death, df_us_cpt_socdist %>% select(county_fips), by='county_fips')
df_us_socdist_unscaled <- inner_join(df_us_cpt_socdist, df_us_slope_prev %>% select(county_fips), by='county_fips')

fips_list <- df_us_prev_unscaled %>% select(county_fips)

df_us_prev_unscaled %>% merge(df_us_death_unscaled %>% select(county_fips:death_rate_0930))  %>%
  merge(df_us_socdist_unscaled %>% select(county_fips, cpt_day_socdist_2, mean_diff_socdist_2)) %>% 
  select(-event) %>% write_csv('us_all_variables.csv')

Rescale Data

df_us_prev_scaled <- df_us_prev_unscaled %>% 
  mutate_at(vars(-county_fips, -event), scale)
df_us_death_scaled <- df_us_death_unscaled %>% 
  mutate_at(vars(-county_fips), scale)
df_us_socdist_scaled <- df_us_socdist_unscaled %>%
  mutate_at(vars(-county_fips, -event), scale)

Calculate spatial lags

# create UDF to calculate lagged variables
calc_lags <- function(df, weights, cols){
  
  cols_only <- df %>% select(cols)
  cols_lag <- weights %*% as.matrix(cols_only) %>% 
  as.matrix() %>% as.data.frame()
names(cols_lag) <- paste0(names(cols_lag), '_lag')

return(cols_lag)
}
# read_weight matrix 
weight_mat_norm <- read_csv('df_us_spatial_weights_matrix.csv')
## 
## -- Column specification --------------------------------------------------------------------
## cols(
##   .default = col_double()
## )
## i Use `spec()` for the full column specifications.
weight_mat_norm <- weight_mat_norm %>% select(-county_fips) %>% as.matrix()
# generate spatially lagged y 
y_only <- df_us_prev_scaled %>% select(onset_prev,slope_prev,slope_prev_var) %>% names()
y_lag <- calc_lags(df_us_prev_scaled, weight_mat_norm, y_only)

# generate spatially lagged X
X_only <- df_us_prev_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_prev_scaled, weight_mat_norm, X_only)

# bind new variables to df
df_us_prev_scaled <- cbind(df_us_prev_scaled, y_lag, X_lag)
# generate spatially lagged y 
y_only <- df_us_death_scaled %>% select(case_rate_0415:death_rate_0930) %>% names()
y_lag <- calc_lags(df_us_death_scaled, weight_mat_norm, y_only)

# generate spatially lagged X
X_only <- df_us_death_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_death_scaled, weight_mat_norm, X_only)

# bind new variables to df
df_us_death_scaled <- cbind(df_us_death_scaled, y_lag, X_lag)
# generate spatially lagged y 
y_only <- df_us_socdist_scaled %>% select(cpt_day_socdist_2,mean_diff_socdist_2) %>% names()
y_lag <- calc_lags(df_us_socdist_scaled, weight_mat_norm, y_only)

# generate spatially lagged X
X_only <- df_us_socdist_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_socdist_scaled, weight_mat_norm, X_only)

# bind new variables to df
df_us_socdist_scaled <- cbind(df_us_socdist_scaled, y_lag, X_lag)
write_csv(df_us_prev_scaled, 'df_us_slope_prev.csv')
write_csv(df_us_death_scaled, 'df_us_death_scaled.csv')
write_csv(df_us_socdist_scaled, 'df_us_cpt_socdist.csv')

Run Specification Curve Analysis

# define function to calculate specification curve analyses for all traits
spec_calculate <- function(df, y, model, controls, all.comb = T){
  
  spec_results_o <- run_specs(df = df, 
                       y = c(y), x = c("pers_o"), 
                       model = c(model), controls = controls, 
                       all.comb = all.comb)
  
  spec_results_c <- run_specs(df = df, 
                       y = c(y), x = c("pers_c"), 
                       model = c(model), controls = controls, 
                       all.comb = all.comb)
  
  spec_results_e <- run_specs(df = df, 
                       y = c(y), x = c("pers_e"), 
                       model = c(model), controls = controls, 
                       all.comb = all.comb)
  
  spec_results_a <- run_specs(df = df, 
                       y = c(y), x = c("pers_a"), 
                       model = c(model), controls = controls, 
                       all.comb = all.comb)
  
  spec_results_n <- run_specs(df = df, 
                       y = c(y), x = c("pers_n"), 
                       model = c(model), controls = controls, 
                       all.comb = all.comb)
  
  spec_results <- list(spec_results_o, spec_results_c, spec_results_e, 
                      spec_results_a, spec_results_n)
  
  names(spec_results) <- list('spec_results_o', 'spec_results_c', 'spec_results_e', 
                      'spec_results_a', 'spec_results_n')

  return(spec_results)
  }
# adapted from specr package code
format_results <- function(df, var, null = 0, desc = FALSE) {

  # rank specs
  if (isFALSE(desc)) {
    df <- df %>%
      dplyr::arrange(!! var)
  } else {
    df <- df %>%
      dplyr::arrange(desc(!! var))
  }

  # create rank variable and color significance
  df <- df %>%
    dplyr::mutate(specifications = 1:nrow(df),
                  color = case_when(conf.low > null ~ "#377eb8",
                                    conf.high < null ~ "#e41a1c",
                                    TRUE ~ "darkgrey"))
  return(df)
}


# define function to plot single specification curve
plot_curves <- function(results, filename, hr=F){
  
  file_path_eps <- paste0("../../Plots/US/", filename,".eps")
  file_path_pdf <- paste0("../../Plots/US/", filename,".pdf")
  file_path_png <- paste0("../../Plots/US/", filename,".png")

  if(hr==F){
    
    results %>%
    format_results(var = .$estimate, null = 0, desc = F) %>%
    ggplot(aes(x = specifications,
               y = estimate,
               ymin = conf.low,
               ymax = conf.high,
               color = color)) +
    geom_point(aes(color = color),
               size = 1) +
    theme_minimal() +
    scale_color_identity() +
    theme(strip.text = element_blank(),
          axis.line = element_line("black", size = .5),
          legend.position = "none",
          panel.spacing = unit(.75, "lines"),
          axis.text = element_text(colour = "black")) +
    labs(x = "") +
      geom_pointrange(alpha = 0.05,
                      size = .6,
                      fatten = 1) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
    labs(x = "", y = "standarized coefficient") + 
      coord_fixed(ratio = 2000, ylim = c(-0.4, 0.4)) +
      theme(panel.border = element_blank(), panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(), 
            axis.line = element_line(colour = "black")) +
      ggsave(file=file_path_eps, device = 'eps')+
      ggsave(file=file_path_pdf, device = 'pdf')+
      ggsave(file=file_path_png, device = 'png')
    
  }else{
    
    results %>%
    format_results(var = .$estimate, null = 1, desc = F) %>%
    ggplot(aes(x = specifications,
               y = estimate,
               ymin = conf.low,
               ymax = conf.high,
               color = color)) +
    geom_point(aes(color = color),
               size = 1) +
    theme_minimal() +
    scale_color_identity() +
    theme(strip.text = element_blank(),
          axis.line = element_line("black", size = .5),
          legend.position = "none",
          panel.spacing = unit(.75, "lines"),
          axis.text = element_text(colour = "black")) +
    labs(x = "") +
      geom_pointrange(alpha = 0.05,
                      size = .6,
                      fatten = 1) +
    geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
    labs(x = "", y = "standarized coefficient") + 
      coord_fixed(ratio = 2000, ylim = c(0.6, 1.4)) +
      theme(panel.border = element_blank(), panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(), 
      axis.line = element_line(colour = "black")) +
      ggsave(file=file_path_eps, device = 'eps')+
      ggsave(file=file_path_pdf, device = 'pdf')+
      ggsave(file=file_path_png, device = 'png')  
    }
  
}

# define function to plot all curves in list
plot_all_curves <- function(ls, analysis, hr=F){
  
  pers <- c('o', 'c', 'e', 'a', 'n')
  filenames <- as.list(paste0(analysis, '_', pers))
  hr <- as.list(rep(hr, 5))
  pmap(list(ls, filenames, hr), plot_curves)
}
# define function to calculate summary stats for single specification curve
calc_summary <- function(df){
  
  dft <- df %>% select(estimate:fit_nobs)
  dft <- dft %>% mutate(significant = as.numeric(p.value<0.05),
                        positive = as.numeric(statistic>0), 
                        negative = as.numeric(statistic<0), 
                        significant_positive = as.numeric(p.value<0.05 & statistic>0),
                        significant_negative = as.numeric(p.value<0.05 & statistic<0))
  
  mean_temp <- dft %>% map_if(is.numeric, mean, .else=NULL) %>% as.data.frame()
  sd_temp <- dft %>% map_if(is.numeric, sd, .else=NULL) %>% as.data.frame()
  
  df_temp <- rbind(mean_temp, sd_temp)
  row.names(df_temp) <- c('mean', 'sd')
  
  return(df_temp)
}

# define function to calculate summary stats for all curves in list
calc_all_summaries <- function(ls){
  
  ls <- ls %>% map(calc_summary)
  names(ls) <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')
  return(ls)
}
coef_to_hr <- function(df){
  
  df %>% mutate(conf.low = exp(estimate-1.96*std.error),
           conf.high = exp(estimate+1.96*std.error),
           estimate = exp(estimate)) 
}

filter_socdist <- function(df){
  
  df %>% filter(str_detect(controls, 'onset_prev') & 
                  str_detect(controls, 'slope_prev'))
}
covariates <- c("airport_dist", "conservative", 'male', 'age', 'popdens',
                'manufact', 'tourism', 'academics', 'medinc', 'healthcare',
                'temp')

Predict Onset

cox_model <- function(formula, data){
  formula <- as.formula(formula)
  coxph(formula = formula, data = data)}
cox_onset_prev <- spec_calculate(df = df_us_prev_scaled, 
               y = "Surv(onset_prev, event)", 
               model = "cox_model", 
               controls = covariates %>% append('onset_prev_lag'),
               all.comb = T)

cox_onset_prev_hr <- cox_onset_prev %>% map(coef_to_hr)

calc_all_summaries(cox_onset_prev_hr)
## $pers_o
##        estimate   std.error statistic     p.value   conf.low  conf.high fit_n
## mean 1.16172145 0.026014376  5.766790 0.007433825 1.10404476 1.22241719  2366
## sd   0.06206793 0.001120517  2.218639 0.040648218 0.06034861 0.06388357     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       1917          638.2887    2.434685e-35         846.4471
## sd            0          136.8117    1.056543e-33         203.5470
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   1.186354e-35           769.6777     9.006769e-36                   NA
## sd     4.978832e-34           172.8896     3.775740e-34                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.23515525         0.9999908      0.68063283
## sd                   NA    0.04511866         0.0000000      0.01744978
##      fit_std.error.concordance   fit_logLik    fit_AIC    fit_BIC fit_nobs
## mean              0.0065112174 -13403.98321 26821.9664 26860.8760     2366
## sd                0.0002305437     68.40587   134.2635   127.4085        0
##      significant positive negative significant_positive significant_negative
## mean   0.9633789        1        0            0.9633789                    0
## sd     0.1878526        0        0            0.1878526                    0
## 
## $pers_c
##        estimate   std.error statistic     p.value   conf.low  conf.high fit_n
## mean 1.14595693 0.023675413  5.720862 0.001312684 1.09398662 1.20039740  2366
## sd   0.04204126 0.000533867  1.536933 0.009826545 0.03991021 0.04431706     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       1917          634.9587    6.334368e-13         836.1058
## sd            0          158.0395    4.053783e-11         223.5635
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   5.061211e-13           753.4356     4.799701e-13                   NA
## sd     3.238962e-11           188.4106     3.071593e-11                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.23363277         0.9999908      0.67911602
## sd                   NA    0.05275286         0.0000000      0.02191225
##      fit_std.error.concordance   fit_logLik    fit_AIC    fit_BIC fit_nobs
## mean              0.0064685052 -13405.64817 26825.2963 26864.2060     2366
## sd                0.0002280484     79.01976   155.5847   148.9729        0
##      significant positive negative significant_positive significant_negative
## mean  0.99414062        1        0           0.99414062                    0
## sd    0.07633129        0        0           0.07633129                    0
## 
## $pers_e
##       estimate    std.error statistic   p.value   conf.low  conf.high fit_n
## mean 1.0053968 0.0230413250 0.2407622 0.5618723 0.96101875 1.05182497  2366
## sd   0.0210153 0.0004509163 0.9189734 0.2847874 0.02088586 0.02111908     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       1917          601.0713    3.043490e-07         811.6531
## sd            0          156.6765    1.947733e-05         226.5546
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   3.236460e-07           731.9152     3.223853e-07                   NA
## sd     2.071186e-05           193.9189     2.063117e-05                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.22261503         0.9999908      0.67472760
## sd                   NA    0.05272031         0.0000000      0.02245513
##      fit_std.error.concordance   fit_logLik    fit_AIC    fit_BIC fit_nobs
## mean              0.0065180540 -13422.59188 26859.1838 26898.0934     2366
## sd                0.0002496992     78.33827   154.1384   147.2827        0
##      significant  positive  negative significant_positive significant_negative
## mean  0.06713867 0.5056152 0.4943848           0.06713867                    0
## sd    0.25029256 0.5000295 0.5000295           0.25029256                    0
## 
## $pers_a
##        estimate    std.error statistic    p.value   conf.low  conf.high fit_n
## mean 1.12712077 0.0241346132  4.911333 0.02630132 1.07503821 1.18172942  2366
## sd   0.05222035 0.0007992203  1.927298 0.11581101 0.04966844 0.05496148     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       1917          627.6168    7.129527e-12         832.9647
## sd            0          157.1791    4.562814e-10         223.8587
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   5.323420e-12            751.675     5.166855e-12                   NA
## sd     3.406905e-10            190.272     3.306702e-10                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.23126981         0.9999908      0.67999298
## sd                   NA    0.05263276         0.0000000      0.02169922
##      fit_std.error.concordance   fit_logLik    fit_AIC    fit_BIC fit_nobs
## mean              0.0064707427 -13409.31914 26832.6383 26871.5479     2366
## sd                0.0002249616     78.58953   154.7361   148.1604        0
##      significant   positive    negative significant_positive
## mean   0.9313965 0.99462891 0.005371094            0.9313965
## sd     0.2528096 0.07309959 0.073099587            0.2528096
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##        estimate    std.error statistic     p.value   conf.low  conf.high fit_n
## mean 0.87994919 0.0254283747 -5.139354 0.006040369 0.83711030 0.92498283  2366
## sd   0.04229444 0.0008502208  2.109454 0.019840097 0.03905764 0.04576956     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       1917          631.3160    1.042710e-34         830.4507
## sd            0          135.0754    6.242899e-33         207.2895
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   8.175495e-34           746.0484     4.523746e-34                   NA
## sd     4.972358e-32           170.8610     2.735136e-32                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.23293232         0.9999908      0.67921943
## sd                   NA    0.04458783         0.0000000      0.01770616
##      fit_std.error.concordance  fit_logLik   fit_AIC    fit_BIC fit_nobs
## mean              0.0064412561 -13407.4695 26828.939 26867.8487     2366
## sd                0.0001869198     67.5377   132.492   125.5353        0
##      significant positive negative significant_positive significant_negative
## mean   0.9619141        0        1                    0            0.9619141
## sd     0.1914271        0        0                    0            0.1914271
plot_all_curves(cox_onset_prev_hr, 'cox_onset_prev_hr', hr = T)
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

cox_onset_prev_ctrl <- coxph(Surv(onset_prev, event) ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + onset_prev_lag, 
                             data = df_us_prev_scaled)

summary(cox_onset_prev_ctrl)
## Call:
## coxph(formula = Surv(onset_prev, event) ~ airport_dist + conservative + 
##     male + age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + onset_prev_lag, data = df_us_prev_scaled)
## 
##   n= 2366, number of events= 1917 
## 
##                    coef exp(coef) se(coef)      z Pr(>|z|)    
## airport_dist   -0.06929   0.93306  0.02959 -2.341 0.019219 *  
## conservative   -0.26778   0.76508  0.02758 -9.709  < 2e-16 ***
## male           -0.10249   0.90259  0.02677 -3.828 0.000129 ***
## age            -0.11380   0.89244  0.02439 -4.666 3.07e-06 ***
## popdens         0.03785   1.03857  0.02364  1.601 0.109371    
## manufact        0.10299   1.10848  0.02806  3.671 0.000242 ***
## tourism         0.05239   1.05379  0.02662  1.968 0.049029 *  
## academics       0.17781   1.19460  0.04482  3.968 7.26e-05 ***
## medinc          0.33067   1.39190  0.03620  9.134  < 2e-16 ***
## healthcare      0.11392   1.12067  0.02693  4.230 2.34e-05 ***
## temp            0.32833   1.38865  0.03005 10.926  < 2e-16 ***
## onset_prev_lag -0.31413   0.73042  0.04236 -7.416 1.21e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## airport_dist      0.9331     1.0717    0.8805    0.9888
## conservative      0.7651     1.3071    0.7248    0.8076
## male              0.9026     1.1079    0.8565    0.9512
## age               0.8924     1.1205    0.8508    0.9361
## popdens           1.0386     0.9629    0.9916    1.0878
## manufact          1.1085     0.9021    1.0492    1.1712
## tourism           1.0538     0.9490    1.0002    1.1102
## academics         1.1946     0.8371    1.0941    1.3043
## medinc            1.3919     0.7184    1.2966    1.4942
## healthcare        1.1207     0.8923    1.0630    1.1814
## temp              1.3886     0.7201    1.3092    1.4729
## onset_prev_lag    0.7304     1.3691    0.6722    0.7937
## 
## Concordance= 0.709  (se = 0.006 )
## Likelihood ratio test= 896.6  on 12 df,   p=<2e-16
## Wald test            = 1059  on 12 df,   p=<2e-16
## Score (logrank) test = 1203  on 12 df,   p=<2e-16

Predict Slopes

# fixed time windows
lm_slope_prev <- spec_calculate(df = df_us_prev_scaled, 
               y = "slope_prev", 
               model = "lm", 
               controls = covariates %>% append('slope_prev_lag'),
               all.comb = T)

calc_all_summaries(lm_slope_prev)
## $pers_o
##        estimate    std.error statistic   p.value   conf.low conf.high
## mean 0.06650710 0.0218723943  3.051989 0.1060491 0.02361598 0.1093982
## sd   0.03848726 0.0007661755  1.803760 0.2188002 0.03867878 0.0383537
##      fit_r.squared fit_adj.r.squared fit_sigma fit_statistic  fit_p.value
## mean    0.16877711        0.16632951 0.9128307      70.25183 1.477756e-24
## sd      0.03743269        0.03708676 0.0202677      14.16632 5.895233e-23
##        fit_df  fit_logLik   fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3136.82893 6291.6579 6343.57847    1965.8421     2358.000000
## sd   1.732262    53.08913  103.6386   96.63557      88.5283        1.732262
##      fit_nobs significant  positive   negative significant_positive
## mean     2366   0.7268066 0.9838867 0.01611328            0.7268066
## sd          0   0.4456537 0.1259266 0.12592662            0.4456537
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_c
##        estimate    std.error statistic    p.value   conf.low  conf.high
## mean 0.08159129 0.0195674938  4.182360 0.01737430 0.04322001 0.11996257
## sd   0.02905970 0.0004474735  1.515199 0.06422916 0.02934306 0.02880029
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.17120959        0.16877025 0.91146684      71.33624 8.942856e-10
## sd      0.03958576        0.03924605 0.02140208      15.26681 4.423700e-08
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3133.22547 6284.4509 6336.3715   1960.08931     2358.000000
## sd   1.732262    55.91484  109.2984  102.3039     93.62033        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.9228516        1        0            0.9228516
## sd          0   0.2668594        0        0            0.2668594
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.00674942 0.0191251166 0.3431732 0.6112414 -0.03075437 0.04425321
## sd   0.01393104 0.0004257969 0.7179348 0.2737243  0.01343339 0.01445982
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.16442606        0.16196752 0.91517150      67.74067 6.606324e-07
## sd      0.04118872        0.04084978 0.02221979      15.14588 3.531075e-05
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3142.77640 6303.5528 6355.4734   1976.13237     2358.000000
## sd   1.732262    57.88175  113.2271  106.2052     97.41132        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366  0.02978516 0.6547852 0.3452148           0.02978516
## sd          0  0.17001487 0.4754963 0.4754963           0.17001487
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate    std.error statistic     p.value   conf.low  conf.high
## mean 0.11095509 0.0199702382  5.583480 0.006947667 0.07179404 0.15011614
## sd   0.03633964 0.0005199555  1.888186 0.042481328 0.03686347 0.03583717
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.17639488        0.17397035 0.90861824      74.12927 4.806967e-13
## sd      0.03882516        0.03848915 0.02103938      15.64261 2.739705e-11
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3125.83790 6269.6758 6321.5964   1947.82611     2358.000000
## sd   1.732262    55.10243  107.6935  100.7668     91.82149        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.9714355        1        0            0.9714355
## sd          0   0.1665992        0        0            0.1665992
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate    std.error statistic   p.value    conf.low   conf.high
## mean -0.06960205 0.0203062105 -3.442200 0.1089957 -0.10942193 -0.02978217
## sd    0.03665124 0.0004166468  1.838027 0.2464262  0.03637535  0.03694315
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.16969364        0.16724876 0.91232268      70.72756 9.620287e-22
## sd      0.03779965        0.03745861 0.02046116      14.60225 5.487729e-20
##        fit_df fit_logLik   fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3135.5009 6289.0018 6340.92240   1963.67454     2358.000000
## sd   1.732262    53.5602  104.6036   97.66964     89.39616        1.732262
##      fit_nobs significant   positive  negative significant_positive
## mean     2366   0.8081055 0.02587891 0.9741211                    0
## sd          0   0.3938387 0.15879340 0.1587934                    0
##      significant_negative
## mean            0.8081055
## sd              0.3938387
plot_all_curves(lm_slope_prev, 'lm_slope_prev')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_slope_prev_ctrl <- lm(slope_prev ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + slope_prev_lag, 
                             data = df_us_prev_scaled)

summary(lm_slope_prev_ctrl)
## 
## Call:
## lm(formula = slope_prev ~ airport_dist + conservative + male + 
##     age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + slope_prev_lag, data = df_us_prev_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9236 -0.4960 -0.2000  0.2154  6.0413 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -0.003469   0.017857  -0.194  0.84600    
## airport_dist    0.008823   0.021113   0.418  0.67606    
## conservative   -0.247126   0.022130 -11.167  < 2e-16 ***
## male           -0.080222   0.019710  -4.070 4.85e-05 ***
## age            -0.052478   0.019239  -2.728  0.00642 ** 
## popdens         0.139040   0.020596   6.751 1.85e-11 ***
## manufact        0.006652   0.022098   0.301  0.76342    
## tourism        -0.008209   0.020752  -0.396  0.69245    
## academics      -0.084176   0.038229  -2.202  0.02777 *  
## medinc          0.263901   0.030260   8.721  < 2e-16 ***
## healthcare      0.041049   0.023823   1.723  0.08500 .  
## temp            0.219069   0.023035   9.510  < 2e-16 ***
## slope_prev_lag  0.303142   0.031452   9.638  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8684 on 2353 degrees of freedom
## Multiple R-squared:  0.2497, Adjusted R-squared:  0.2458 
## F-statistic: 65.25 on 12 and 2353 DF,  p-value: < 2.2e-16
# variable time windows
lm_slope_prev_var <- spec_calculate(df = df_us_prev_scaled, 
               y = "slope_prev_var", 
               model = "lm", 
               controls = covariates %>% append('slope_prev_var_lag'),
               all.comb = T)

calc_all_summaries(lm_slope_prev_var)
## $pers_o
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.01843360 0.0228448236 0.8092564 0.3374739 -0.02636443 0.06323163
## sd   0.03179557 0.0008021976 1.4119922 0.2928829  0.03186131 0.03180759
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09368869        0.09101223 0.95330359     35.311428 4.358225e-09
## sd      0.02737264        0.02697465 0.01414535      8.558473 1.601068e-07
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3239.79451 6497.58901 6549.50962    2143.4263     2358.000000
## sd   1.732262    35.73047   69.01074   62.48346      64.7363        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.2021484 0.7170410 0.2829590            0.1867676
## sd          0   0.4016514 0.4504917 0.4504917            0.3897724
##      significant_negative
## mean           0.01538086
## sd             0.12307716
## 
## $pers_c
##        estimate    std.error statistic     p.value   conf.low  conf.high
## mean 0.09199460 0.0203745098  4.529303 0.003265235 0.05204079 0.13194841
## sd   0.02635443 0.0003465441  1.337233 0.009768975 0.02675594 0.02596451
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.10121874        0.09856375 0.94935036     38.710210 2.822507e-11
## sd      0.02521521        0.02480452 0.01304579      8.221666 1.221468e-09
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3230.00003 6478.00006 6529.92067   2125.61769     2358.000000
## sd   1.732262    33.10894   63.71753   57.07268     59.63398        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.9880371        1        0            0.9880371
## sd          0   0.1087321        0        0            0.1087321
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##          estimate    std.error  statistic   p.value     conf.low  conf.high
## mean -0.007067026 0.0199332406 -0.3585409 0.6301136 -0.046155523 0.03202147
## sd    0.010169162 0.0002821194  0.5126193 0.2278677  0.009878615 0.01048088
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09279622        0.09011762 0.95376562     34.846983 0.0001193256
## sd      0.02829475        0.02789763 0.01461407      8.898946 0.0044740979
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3240.92388 6499.84776 6551.76837   2145.53694     2358.000000
## sd   1.732262    36.85796   71.26061   64.69776     66.91708        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366           0 0.2226562 0.7773438                    0
## sd          0           0 0.4160802 0.4160802                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate    std.error statistic    p.value   conf.low  conf.high
## mean 0.11504837 0.0208132613  5.555493 0.00127825 0.07423418 0.15586257
## sd   0.03195153 0.0004849158  1.619015 0.00604934 0.03262194 0.03129565
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.10543597        0.10279285 0.94712809     40.680990 8.095085e-15
## sd      0.02407654        0.02366633 0.01247244      8.311559 3.982934e-13
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3224.47345 6466.94691 6518.86751   2115.64393     2358.000000
## sd   1.732262    31.74214   60.99344   54.40905     56.94102        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366  0.99584961        1        0           0.99584961
## sd          0  0.06429754        0        0           0.06429754
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate    std.error statistic   p.value    conf.low    conf.high
## mean -0.04812700 0.0211994748 -2.285890 0.1692657 -0.08969855 -0.006555458
## sd    0.03063361 0.0004145531  1.471557 0.2877701  0.03022191  0.031061141
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09553382        0.09286226 0.95234100     36.196044 9.356756e-12
## sd      0.02627732        0.02587729 0.01357951      8.357229 3.360019e-10
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3237.42430 6492.84860 6544.76920   2139.06251     2358.000000
## sd   1.732262    34.34926   66.24897   59.74882     62.14587        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.6511230 0.1003418 0.8996582                    0
## sd          0   0.4766732 0.3004919 0.3004919                    0
##      significant_negative
## mean            0.6511230
## sd              0.4766732
plot_all_curves(lm_slope_prev_var, 'lm_slope_prev_var')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_slope_prev_var_ctrl <- lm(slope_prev_var ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + slope_prev_var_lag, 
                             data = df_us_prev_scaled)

summary(lm_slope_prev_var_ctrl)
## 
## Call:
## lm(formula = slope_prev_var ~ airport_dist + conservative + male + 
##     age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + slope_prev_var_lag, data = df_us_prev_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1372 -0.5606 -0.2239  0.3000  9.4699 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.002831   0.018948  -0.149 0.881224    
## airport_dist        0.021432   0.022402   0.957 0.338819    
## conservative       -0.223392   0.023463  -9.521  < 2e-16 ***
## male               -0.070245   0.020914  -3.359 0.000795 ***
## age                -0.060962   0.020410  -2.987 0.002847 ** 
## popdens             0.111035   0.021855   5.081 4.06e-07 ***
## manufact            0.037805   0.023416   1.614 0.106557    
## tourism            -0.026901   0.022017  -1.222 0.221893    
## academics          -0.131707   0.040554  -3.248 0.001180 ** 
## medinc              0.213479   0.032092   6.652 3.58e-11 ***
## healthcare          0.032853   0.025277   1.300 0.193814    
## temp                0.222065   0.024472   9.074  < 2e-16 ***
## slope_prev_var_lag  0.235435   0.034511   6.822 1.14e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9214 on 2353 degrees of freedom
## Multiple R-squared:  0.1553, Adjusted R-squared:  0.151 
## F-statistic: 36.04 on 12 and 2353 DF,  p-value: < 2.2e-16

Predict Cases

lm_cases_0415 <- spec_calculate(df = df_us_death_scaled, 
               y = "case_rate_0415", 
               model = "lm", 
               controls = covariates %>% append('case_rate_0415_lag'),
               all.comb = T)

calc_all_summaries(lm_cases_0415)
## $pers_o
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.04876701 0.0227720892  2.150331 0.1735715 0.004111615 0.09342241
## sd   0.03165959 0.0007998025  1.423872 0.2427764 0.031873299 0.03152256
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09943230        0.09677125 0.95027531      38.13421 1.473910e-15
## sd      0.02766227        0.02732592 0.01433976      10.44299 4.683503e-14
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.25867 6482.51733 6534.43794   2129.84261     2358.000000
## sd   1.732262    36.16042   70.17978   64.65003     65.42127        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.4975586 0.9543457 0.0456543            0.4975586
## sd          0   0.5000551 0.2087597 0.2087597            0.5000551
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_c
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.04262502 0.0203947736  2.104424 0.1466779 0.002631474 0.08261857
## sd   0.02066561 0.0004616577  1.037216 0.2339358 0.021205973 0.02015145
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09892688        0.09626521 0.95052224      37.78673 2.060162e-05
## sd      0.03004164        0.02971634 0.01556665      11.45476 9.415328e-04
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.82626 6483.65252 6535.57312   2131.03793     2358.000000
## sd   1.732262    39.13348   76.15729   70.67325     71.04847        1.732262
##      fit_nobs significant  positive   negative significant_positive
## mean     2366   0.6054688 0.9758301 0.02416992            0.6054688
## sd          0   0.4888094 0.1535952 0.15359524            0.4888094
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##         estimate    std.error statistic   p.value     conf.low   conf.high
## mean 0.012073121 0.0198842257 0.6045850 0.5663104 -0.026919260 0.051065502
## sd   0.009551023 0.0003069414 0.4745963 0.2407368  0.009344157 0.009790576
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.09705206        0.09438499 0.95151078      36.93931 0.0000119453
## sd      0.03006737        0.02973723 0.01556634      11.25001 0.0005722379
##        fit_df  fit_logLik   fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3235.28615 6488.5723 6540.49291   2135.47188     2358.000000
## sd   1.732262    39.11061   76.0876   70.52753     71.10934        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366  0.01464844 0.8945312 0.1054688           0.01464844
## sd          0  0.12015567 0.3071940 0.3071940           0.12015567
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate   std.error statistic    p.value   conf.low  conf.high
## mean 0.07795076 0.020834282  3.766163 0.02775633 0.03709534 0.11880617
## sd   0.02637373 0.000592395  1.323556 0.09913529 0.02709396 0.02568587
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean     0.1028759         0.1002257 0.94843600      39.58762 1.825308e-07
## sd       0.0300417         0.0297263 0.01560479      11.81480 7.034892e-06
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3227.62466 6473.24933 6525.16993   2121.69844     2358.000000
## sd   1.732262    39.29875   76.53274   71.18872     71.04863        1.732262
##      fit_nobs significant   positive     negative significant_positive
## mean     2366   0.9033203 0.99951172 0.0004882812            0.9033203
## sd          0   0.2955572 0.02209439 0.0220943887            0.2955572
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate    std.error statistic   p.value    conf.low    conf.high
## mean -0.04774835 0.0211530060 -2.276129 0.1478689 -0.08922878 -0.006267932
## sd    0.02663904 0.0004849494  1.291895 0.2612650  0.02607382  0.027225747
##      fit_r.squared fit_adj.r.squared fit_sigma fit_statistic  fit_p.value
## mean    0.09947116        0.09681049 0.9502443      38.13183 2.304518e-12
## sd      0.02894177        0.02861880 0.0150099      11.11191 1.129695e-10
##        fit_df fit_logLik    fit_AIC  fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.1560 6482.31207 6534.233   2129.75070     2358.000000
## sd   1.732262    37.7909   73.49507   68.108     68.44729        1.732262
##      fit_nobs significant   positive  negative significant_positive
## mean     2366   0.6376953 0.05932617 0.9406738                    0
## sd          0   0.4807249 0.23626300 0.2362630                    0
##      significant_negative
## mean            0.6376953
## sd              0.4807249
plot_all_curves(lm_cases_0415, 'lm_cases_0415')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_cases_0415_ctrl <- lm(case_rate_0415 ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + case_rate_0415_lag, 
                             data = df_us_death_scaled)

summary(lm_cases_0415_ctrl)
## 
## Call:
## lm(formula = case_rate_0415 ~ airport_dist + conservative + male + 
##     age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + case_rate_0415_lag, data = df_us_death_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9724 -0.3185 -0.1425  0.0664 14.0442 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.002321   0.018957  -0.122 0.902545    
## airport_dist        0.039999   0.022414   1.785 0.074469 .  
## conservative       -0.192255   0.023487  -8.186 4.38e-16 ***
## male               -0.054249   0.020924  -2.593 0.009582 ** 
## age                -0.015480   0.020419  -0.758 0.448449    
## popdens             0.197538   0.021860   9.037  < 2e-16 ***
## manufact           -0.014696   0.023468  -0.626 0.531255    
## tourism            -0.015331   0.022028  -0.696 0.486509    
## academics          -0.148486   0.040571  -3.660 0.000258 ***
## medinc              0.242370   0.032107   7.549 6.24e-14 ***
## healthcare          0.034985   0.025290   1.383 0.166693    
## temp                0.155413   0.024405   6.368 2.29e-10 ***
## case_rate_0415_lag  0.187843   0.034440   5.454 5.43e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9219 on 2353 degrees of freedom
## Multiple R-squared:  0.1545, Adjusted R-squared:  0.1502 
## F-statistic: 35.83 on 12 and 2353 DF,  p-value: < 2.2e-16
lm_cases_0930 <- spec_calculate(df = df_us_death_scaled, 
               y = "case_rate_0930", 
               model = "lm", 
               controls = covariates %>% append('case_rate_0930_lag'),
               all.comb = T)

calc_all_summaries(lm_cases_0930)
## $pers_o
##         estimate   std.error statistic    p.value    conf.low   conf.high
## mean -0.08748817 0.020625280 -4.336426 0.06828373 -0.12793374 -0.04704261
## sd    0.04752857 0.001548285  2.489699 0.17839331  0.04636929  0.04884932
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean     0.2576031         0.2554435 0.86051765     125.08352 1.479292e-08
## sd       0.1115422         0.1115596 0.06377168      61.40988 4.197068e-07
##        fit_df fit_logLik  fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2991.3823 6000.765 6052.6852    1755.7688     2358.000000
## sd   1.732262   173.8364  345.775  340.4391     263.7973        1.732262
##      fit_nobs significant   positive  negative significant_positive
## mean     2366   0.8127441 0.01391602 0.9860840                    0
## sd          0   0.3901644 0.11715678 0.1171568                    0
##      significant_negative
## mean            0.8127441
## sd              0.3901644
## 
## $pers_c
##        estimate    std.error statistic     p.value   conf.low conf.high
## mean 0.12308312 0.0184266866  6.583651 0.001060154 0.08694894 0.1592173
## sd   0.05683318 0.0009247264  2.811747 0.004025125 0.05550506 0.0581875
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.26752476        0.26538940 0.85530837     130.36518 3.170247e-32
## sd      0.09585838        0.09583076 0.05530759      53.15408 6.694621e-31
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2978.5255 5975.0511 6026.9717    1732.3039     2358.000000
## sd   1.732262   152.1392  302.3326  296.8751     226.7051        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        1        0                    1
## sd          0           0        0        0                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##        estimate   std.error statistic    p.value   conf.low  conf.high
## mean 0.06069173 0.018035486 3.3280702 0.01604293 0.02532467 0.09605878
## sd   0.01982743 0.001245754 0.9305813 0.04801242 0.01817235 0.02163228
##      fit_r.squared fit_adj.r.squared fit_sigma fit_statistic  fit_p.value
## mean     0.2542249         0.2520547 0.8626706     122.18282 5.151662e-06
## sd       0.1072672         0.1072635 0.0612009      57.77144 9.231320e-05
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2997.8262 6013.6525 6065.5731    1763.7582     2358.000000
## sd   1.732262   166.5089  331.0699  325.5924     253.6869        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.9135742        1        0            0.9135742
## sd          0   0.2810261        0        0            0.2810261
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate   std.error statistic      p.value   conf.low  conf.high
## mean 0.20856723 0.018548064 11.178947 1.455611e-10 0.17219502 0.24493944
## sd   0.06258162 0.000809816  3.068922 1.114284e-09 0.06164063 0.06354836
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.29257675        0.29051179 0.84079681     147.40723 4.922386e-58
## sd      0.08593704        0.08589113 0.05049305      51.53961 1.583845e-56
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2938.7056 5895.4112 5947.3318    1673.0560     2358.000000
## sd   1.732262   141.4434  280.9384  275.4853     203.2411        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        1        0                    1
## sd          0           0        0        0                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate   std.error statistic      p.value    conf.low   conf.high
## mean -0.18517486 0.018911721 -9.704595 1.720658e-06 -0.22226019 -0.14808953
## sd    0.05329514 0.001037646  2.400281 2.810106e-05  0.05486468  0.05175801
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.28204011        0.27994829 0.84680190     139.91098 8.135163e-27
## sd      0.09375431        0.09371317 0.05458002      53.56275 2.424387e-25
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2954.9142 5927.8285 5979.7491    1697.9751     2358.000000
## sd   1.732262   151.5238  301.0411  295.4047     221.7289        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        0        1                    0
## sd          0           0        0        0                    0
##      significant_negative
## mean                    1
## sd                      0
plot_all_curves(lm_cases_0930, 'lm_cases_0930')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_cases_0930_ctrl <- lm(case_rate_0930 ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + case_rate_0930_lag, 
                             data = df_us_death_scaled)

summary(lm_cases_0930_ctrl)
## 
## Call:
## lm(formula = case_rate_0930 ~ airport_dist + conservative + male + 
##     age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + case_rate_0930_lag, data = df_us_death_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5977 -0.4787 -0.1204  0.3630  6.7757 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.001975   0.015604  -0.127  0.89929    
## airport_dist        0.119021   0.018459   6.448 1.37e-10 ***
## conservative       -0.214833   0.019372 -11.090  < 2e-16 ***
## male                0.152308   0.017222   8.844  < 2e-16 ***
## age                -0.293421   0.016809 -17.457  < 2e-16 ***
## popdens             0.009099   0.017999   0.506  0.61323    
## manufact            0.173597   0.019327   8.982  < 2e-16 ***
## tourism            -0.021363   0.018148  -1.177  0.23923    
## academics          -0.107078   0.033555  -3.191  0.00144 ** 
## medinc             -0.025005   0.026524  -0.943  0.34592    
## healthcare          0.066311   0.020818   3.185  0.00147 ** 
## temp                0.483630   0.021645  22.344  < 2e-16 ***
## case_rate_0930_lag  0.226178   0.028937   7.816 8.14e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7589 on 2353 degrees of freedom
## Multiple R-squared:  0.427,  Adjusted R-squared:  0.4241 
## F-statistic: 146.1 on 12 and 2353 DF,  p-value: < 2.2e-16

Predict Deaths

lm_deaths_0415 <- spec_calculate(df = df_us_death_scaled, 
               y = "death_rate_0415", 
               model = "lm", 
               controls = covariates %>% append('death_rate_0415_lag'),
               all.comb = T)

calc_all_summaries(lm_deaths_0415)
## $pers_o
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.02159386 0.0232443310 0.9352266 0.3927249 -0.02398759 0.06717531
## sd   0.02622739 0.0008315074 1.1494376 0.3084098  0.02646229 0.02609246
##      fit_r.squared fit_adj.r.squared   fit_sigma fit_statistic  fit_p.value
## mean    0.06227496        0.05950046 0.969742909     22.662019 8.001974e-08
## sd      0.01965118        0.01924889 0.009912406      6.298663 2.447263e-06
##        fit_df fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3280.3844 6578.76886 6630.68946   2217.71972     2358.000000
## sd   1.732262    24.7346   47.20338   41.60315     46.47504        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.1784668 0.7832031 0.2167969            0.1784668
## sd          0   0.3829520 0.4121134 0.4121134            0.3829520
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_c
##        estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.04372995 0.0207956843 2.1162652 0.1315361 0.002950226 0.08450968
## sd   0.01888260 0.0004178388 0.9341132 0.2095286 0.019457170 0.01832666
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.06352029        0.06074950 0.96909709     23.161227 6.476435e-06
## sd      0.01994210        0.01954641 0.01006702      6.585468 2.596939e-04
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3278.80427 6575.60854 6627.52914   2214.77451     2358.000000
## sd   1.732262    25.10753   47.97863   42.46171     47.16306        1.732262
##      fit_nobs significant   positive    negative significant_positive
## mean     2366   0.6140137 0.99804688 0.001953125            0.6140137
## sd          0   0.4868868 0.04415638 0.044156385            0.4868868
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##          estimate    std.error  statistic   p.value     conf.low  conf.high
## mean -0.009297587 0.0202759542 -0.4597789 0.6280569 -0.049058136 0.03046296
## sd    0.007132299 0.0002130225  0.3530586 0.1908608  0.007010183 0.00727638
##      fit_r.squared fit_adj.r.squared fit_sigma fit_statistic  fit_p.value
## mean    0.06151788        0.05874158 0.9701292     22.284747 0.0002391457
## sd      0.02056854        0.02016790 0.0103773      6.638708 0.0091204303
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3281.31517 6580.63034 6632.55094   2219.51021     2358.000000
## sd   1.732262    25.84772   49.42726   43.77151     48.64461        1.732262
##      fit_nobs significant   positive  negative significant_positive
## mean     2366           0 0.08740234 0.9125977                    0
## sd          0           0 0.28245823 0.2824582                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate    std.error statistic    p.value   conf.low  conf.high
## mean 0.08059878 0.0212479128  3.816159 0.01274672 0.03893225 0.12226531
## sd   0.02281645 0.0005825912  1.139402 0.04100308 0.02361434 0.02204891
##      fit_r.squared fit_adj.r.squared   fit_sigma fit_statistic  fit_p.value
## mean    0.06766920        0.06491029 0.966950106     24.935697 1.104777e-08
## sd      0.01948835        0.01910418 0.009860482      6.747553 4.400887e-07
##        fit_df  fit_logLik    fit_AIC    fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3273.56134 6565.12267 6617.04328   2204.96235     2358.000000
## sd   1.732262    24.64179   47.10499   41.80496     46.08995        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.9294434        1        0            0.9294434
## sd          0   0.2561141        0        0            0.2561141
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate    std.error  statistic   p.value    conf.low  conf.high
## mean -0.01275703 0.0215914703 -0.6063945 0.3820066 -0.05509727 0.02958321
## sd    0.02276319 0.0004845586  1.0615056 0.2740656  0.02216699 0.02338281
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.06198511        0.05920994 0.96988961     22.517221 1.683153e-06
## sd      0.02023050        0.01983336 0.01020926      6.553321 3.977900e-05
##        fit_df  fit_logLik    fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3280.73490 6579.46980 6631.3904   2218.40522     2358.000000
## sd   1.732262    25.44329   48.63943   43.0717     47.84512        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366  0.06982422 0.2592773 0.7407227                    0
## sd          0  0.25488165 0.4382916 0.4382916                    0
##      significant_negative
## mean           0.06982422
## sd             0.25488165
plot_all_curves(lm_deaths_0415, 'lm_deaths_0415')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_deaths_0415_ctrl <- lm(death_rate_0415 ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + death_rate_0415_lag, 
                             data = df_us_death_scaled)

summary(lm_deaths_0415_ctrl)
## 
## Call:
## lm(formula = death_rate_0415 ~ airport_dist + conservative + 
##     male + age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + death_rate_0415_lag, data = df_us_death_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9826 -0.3573 -0.1845  0.0510 14.3516 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.002784   0.019506  -0.143  0.88650    
## airport_dist         0.030827   0.023066   1.336  0.18153    
## conservative        -0.164472   0.024153  -6.809 1.24e-11 ***
## male                -0.058699   0.021529  -2.727  0.00645 ** 
## age                  0.005348   0.021010   0.255  0.79911    
## popdens              0.149665   0.022492   6.654 3.53e-11 ***
## manufact             0.001234   0.024105   0.051  0.95919    
## tourism             -0.038348   0.022667  -1.692  0.09082 .  
## academics           -0.118379   0.041756  -2.835  0.00462 ** 
## medinc               0.167830   0.033048   5.078 4.11e-07 ***
## healthcare           0.023905   0.026021   0.919  0.35835    
## temp                 0.152491   0.025113   6.072 1.47e-09 ***
## death_rate_0415_lag  0.227223   0.034577   6.572 6.11e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9486 on 2353 degrees of freedom
## Multiple R-squared:  0.1048, Adjusted R-squared:  0.1002 
## F-statistic: 22.96 on 12 and 2353 DF,  p-value: < 2.2e-16
lm_deaths_0930 <- spec_calculate(df = df_us_death_scaled, 
               y = "death_rate_0930", 
               model = "lm", 
               controls = covariates %>% append('death_rate_0930_lag'),
               all.comb = T)

calc_all_summaries(lm_deaths_0930)
## $pers_o
##         estimate   std.error statistic   p.value    conf.low    conf.high
## mean -0.04771222 0.021786652 -2.278399 0.2033106 -0.09043520 -0.004989232
## sd    0.04891033 0.001143027  2.365694 0.2823974  0.04739848  0.050476466
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.17340678        0.17098490 0.90938022      73.76279 0.0003284423
## sd      0.08211462        0.08202695 0.04520183      38.89783 0.0127438710
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3125.5125 6269.0249 6320.9455    1954.8930     2358.000000
## sd   1.732262   118.5453  235.2744  230.2473     194.2011        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.5175781 0.1660156 0.8339844           0.01733398
## sd          0   0.4997519 0.3721401 0.3721401           0.13052845
##      significant_negative
## mean            0.5002441
## sd              0.5000610
## 
## $pers_c
##       estimate   std.error statistic     p.value   conf.low  conf.high
## mean 0.1268692 0.019400070  6.486926 0.001113984 0.08882628 0.16491221
## sd   0.0530807 0.000431052  2.614407 0.003196235 0.05230910 0.05385451
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.18772875        0.18534406 0.90182774      81.06360 3.020223e-31
## sd      0.06656885        0.06643439 0.03691875      31.69162 9.761146e-30
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3106.72700 6231.4540 6283.3746    1921.0215     2358.000000
## sd   1.732262    97.68093  193.4857  188.3267     157.4353        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        1        0                    1
## sd          0           0        0        0                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_e
##         estimate    std.error statistic   p.value    conf.low  conf.high
## mean 0.006814176 0.0190529070 0.3370688 0.5684355 -0.03054801 0.04417636
## sd   0.014399735 0.0008842485 0.7376599 0.2709864  0.01342749 0.01550550
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.17020480        0.16777296 0.91123005      71.84871 0.0002407612
## sd      0.07920734        0.07910241 0.04344308      36.74472 0.0097444833
##        fit_df fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3130.5609 6279.1219 6331.0425    1962.4657     2358.000000
## sd   1.732262   113.5638  225.2666  220.1163     187.3254        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366  0.01293945 0.6718750 0.3281250           0.01293945
## sd          0  0.11302718 0.4695879 0.4695879           0.11302718
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_a
##        estimate    std.error statistic      p.value   conf.low  conf.high
## mean 0.20456071 0.0195666193 10.433477 1.450827e-09 0.16619114 0.24293027
## sd   0.05862264 0.0002714668  2.928455 1.173231e-08 0.05832843 0.05892019
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.21065181        0.20833117 0.88918037      93.99839 4.111013e-56
## sd      0.05703859        0.05688833 0.03205223      29.92537 1.256598e-54
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3073.75974 6165.5195 6217.4401    1866.8085     2358.000000
## sd   1.732262    86.01007  170.1551  165.0649     134.8963        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        1        0                    1
## sd          0           0        0        0                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_n
##         estimate    std.error statistic    p.value    conf.low   conf.high
## mean -0.09751528 0.0201788237 -4.781985 0.06488922 -0.13708536 -0.05794521
## sd    0.04905087 0.0007283533  2.339181 0.18515988  0.04998104  0.04814510
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.18025226        0.17784851 0.90582179      76.95645 5.561893e-13
## sd      0.07348383        0.07335924 0.04048169      34.39706 2.643324e-11
##        fit_df fit_logLik   fit_AIC  fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3116.8080 6251.6161 6303.537    1938.7034     2358.000000
## sd   1.732262   106.3505  210.8025  205.555     173.7892        1.732262
##      fit_nobs significant   positive  negative significant_positive
## mean     2366   0.8454590 0.01293945 0.9870605                    0
## sd          0   0.3615107 0.11302718 0.1130272                    0
##      significant_negative
## mean            0.8454590
## sd              0.3615107
plot_all_curves(lm_deaths_0930, 'lm_deaths_0930')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_deaths_0930_ctrl <- lm(death_rate_0930 ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + death_rate_0930_lag, 
                             data = df_us_death_scaled)

summary(lm_deaths_0930_ctrl)
## 
## Call:
## lm(formula = death_rate_0930 ~ airport_dist + conservative + 
##     male + age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + death_rate_0930_lag, data = df_us_death_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9380 -0.5061 -0.1812  0.2849  5.7208 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.001376   0.017228  -0.080  0.93636    
## airport_dist         0.094936   0.020382   4.658 3.37e-06 ***
## conservative        -0.326855   0.021335 -15.320  < 2e-16 ***
## male                -0.078391   0.019013  -4.123 3.87e-05 ***
## age                 -0.056769   0.018559  -3.059  0.00225 ** 
## popdens              0.087295   0.019869   4.394 1.16e-05 ***
## manufact             0.036678   0.021292   1.723  0.08509 .  
## tourism             -0.068205   0.020024  -3.406  0.00067 ***
## academics           -0.248127   0.036920  -6.721 2.26e-11 ***
## medinc               0.068018   0.029185   2.331  0.01986 *  
## healthcare           0.039224   0.022986   1.706  0.08805 .  
## temp                 0.453554   0.023106  19.630  < 2e-16 ***
## death_rate_0930_lag  0.171593   0.031177   5.504 4.12e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8379 on 2353 degrees of freedom
## Multiple R-squared:  0.3015, Adjusted R-squared:  0.2979 
## F-statistic: 84.64 on 12 and 2353 DF,  p-value: < 2.2e-16

Predict Socdist Onset

cox_onset_socdist <- spec_calculate(df = df_us_socdist_scaled, 
               y = "Surv(cpt_day_socdist_2, event)", 
               model = "cox_model", 
               controls = covariates %>% 
                 append(c('cpt_day_socdist_2_lag', 'onset_prev', 'slope_prev')),
               all.comb = T)

cox_onset_socdist <- cox_onset_socdist %>% map(filter_socdist)

cox_onset_socdist_hr <- cox_onset_socdist %>% map(coef_to_hr)

calc_all_summaries(cox_onset_socdist_hr)
## $pers_o
##        estimate    std.error statistic     p.value   conf.low  conf.high fit_n
## mean 0.90084718 0.0251620611 -4.186946 0.001514851 0.85747908 0.94641117  2366
## sd   0.02327209 0.0008332922  1.134641 0.004378419 0.02144995 0.02529138     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       2365         234.86781    5.412147e-22        224.69959
## sd            0          66.65304    5.712933e-21         65.57137
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   1.657834e-20          223.08702      3.23802e-20                   NA
## sd     1.946635e-19           64.68873      4.20639e-19                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.09414009         0.9999987      0.63791890
## sd                   NA    0.02557470         0.0000000      0.01724889
##      fit_std.error.concordance   fit_logLik     fit_AIC     fit_BIC fit_nobs
## mean              0.0071943487 -15894.97671 31807.95342 31859.87022     2366
## sd                0.0002517063     33.32652    64.80125    60.26247        0
##      significant positive negative significant_positive significant_negative
## mean  0.99926758        0        1                    0           0.99926758
## sd    0.02705668        0        0                    0           0.02705668
## 
## $pers_c
##       estimate    std.error statistic    p.value  conf.low  conf.high fit_n
## mean 0.9305632 0.0212930328 -3.439328 0.02976033 0.8924980 0.97025352  2366
## sd   0.0271874 0.0006426558  1.464538 0.04948005 0.0250966 0.02942282     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       2365         230.04233    9.389246e-18        220.31793
## sd            0          65.03569    8.934878e-17         63.96218
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   9.055267e-17           218.1110     1.179495e-16                   NA
## sd     8.321553e-16            62.7894     1.056573e-15                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.09230748         0.9999987      0.63684492
## sd                   NA    0.02505988         0.0000000      0.01686053
##      fit_std.error.concordance   fit_logLik     fit_AIC     fit_BIC fit_nobs
## mean              0.0072149160 -15897.38945 31812.77891 31864.69571     2366
## sd                0.0002147792     32.51784    63.01764    57.96853        0
##      significant positive negative significant_positive significant_negative
## mean   0.7814941        0        1                    0            0.7814941
## sd     0.4132829        0        0                    0            0.4132829
## 
## $pers_e
##        estimate    std.error statistic   p.value   conf.low conf.high fit_n
## mean 1.02426578 0.0213783358 1.1179169 0.3310683 0.98223389 1.0680964  2366
## sd   0.01204374 0.0001072928 0.5488602 0.2332412 0.01152189 0.0125931     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       2365         217.59045    4.479032e-13        207.16637
## sd            0          74.34435    3.637662e-12         73.62063
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   4.870524e-12          205.28108     6.898468e-12                   NA
## sd     3.612500e-11           72.55058     4.849513e-11                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.08741196         0.9999987      0.63445954
## sd                   NA    0.02878782         0.0000000      0.01893728
##      fit_std.error.concordance   fit_logLik     fit_AIC     fit_BIC fit_nobs
## mean              0.0072780187 -15903.61539 31825.23079 31877.14759     2366
## sd                0.0002501295     37.17218    72.42525    67.58321        0
##      significant positive negative significant_positive significant_negative
## mean  0.03979492        1        0           0.03979492                    0
## sd    0.19550094        0        0           0.19550094                    0
## 
## $pers_a
##        estimate    std.error statistic   p.value   conf.low  conf.high fit_n
## mean 1.00925711 0.0215170876 0.3206578 0.1308660 0.96752121 1.05279642  2366
## sd   0.04240161 0.0008720833 1.9443295 0.1687364 0.03927003 0.04575063     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       2365         219.91522    3.098970e-13        209.08456
## sd            0          74.34997    2.452981e-12         72.81736
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   3.048073e-12          207.31414     4.251963e-12                   NA
## sd     2.230823e-11           71.92873     2.986865e-11                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.08830803         0.9999987      0.63489406
## sd                   NA    0.02877147         0.0000000      0.01796452
##      fit_std.error.concordance   fit_logLik     fit_AIC     fit_BIC fit_nobs
## mean              0.0072595031 -15902.45301 31822.90601 31874.82281     2366
## sd                0.0002430652     37.17499    72.45016    67.66904        0
##      significant  positive  negative significant_positive significant_negative
## mean   0.4101562 0.5031738 0.4968262            0.2971191            0.1130371
## sd     0.4919219 0.5000510 0.5000510            0.4570452            0.3166768
## 
## $pers_n
##        estimate    std.error statistic    p.value   conf.low  conf.high fit_n
## mean 1.07129629 0.0227821273  3.028251 0.05228809 1.02453397 1.12019435  2366
## sd   0.02975165 0.0005711819  1.264375 0.10005770 0.02922361 0.03029421     0
##      fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean       2365         226.77298    2.340417e-18        216.21052
## sd            0          66.03596    2.269295e-17         64.65633
##      fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean   1.974366e-17          213.98344     2.566388e-17                   NA
## sd     1.664079e-16           63.52385     2.065698e-16                   NA
##      fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean                 NA    0.09104196         0.9999987      0.63592775
## sd                   NA    0.02545169         0.0000000      0.01772905
##      fit_std.error.concordance   fit_logLik     fit_AIC     fit_BIC fit_nobs
## mean              0.0072763743 -15899.02413 31816.04825 31867.96505     2366
## sd                0.0002555284     33.01798    64.09478    59.27922        0
##      significant positive negative significant_positive significant_negative
## mean   0.7404785        1        0            0.7404785                    0
## sd     0.4384256        0        0            0.4384256                    0
plot_all_curves(cox_onset_socdist_hr, 'cox_onset_socdist_hr', hr = T)
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

cox_onset_socdist_ctrl <- coxph(Surv(cpt_day_socdist_2, event) ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + onset_prev + slope_prev +
                                 cpt_day_socdist_2_lag, 
                             data = df_us_socdist_scaled)

summary(cox_onset_socdist_ctrl)
## Call:
## coxph(formula = Surv(cpt_day_socdist_2, event) ~ airport_dist + 
##     conservative + male + age + popdens + manufact + tourism + 
##     academics + medinc + healthcare + temp + onset_prev + slope_prev + 
##     cpt_day_socdist_2_lag, data = df_us_socdist_scaled)
## 
##   n= 2366, number of events= 2365 
## 
##                           coef exp(coef) se(coef)       z Pr(>|z|)    
## airport_dist          -0.12768   0.88013  0.02573  -4.962 6.98e-07 ***
## conservative           0.09538   1.10007  0.02666   3.577 0.000347 ***
## male                  -0.05323   0.94816  0.02335  -2.279 0.022655 *  
## age                   -0.01756   0.98259  0.02205  -0.797 0.425615    
## popdens               -0.07439   0.92831  0.02794  -2.663 0.007750 ** 
## manufact               0.04968   1.05093  0.02457   2.022 0.043180 *  
## tourism               -0.11767   0.88899  0.02528  -4.655 3.24e-06 ***
## academics              0.02042   1.02063  0.04352   0.469 0.639026    
## medinc                -0.07733   0.92558  0.03477  -2.224 0.026136 *  
## healthcare             0.03931   1.04009  0.02802   1.403 0.160717    
## temp                  -0.29100   0.74752  0.02704 -10.761  < 2e-16 ***
## onset_prev            -0.03523   0.96539  0.03418  -1.031 0.302745    
## slope_prev            -0.11930   0.88754  0.03333  -3.580 0.000344 ***
## cpt_day_socdist_2_lag -0.23676   0.78918  0.04751  -4.983 6.25e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       exp(coef) exp(-coef) lower .95 upper .95
## airport_dist             0.8801     1.1362    0.8368    0.9257
## conservative             1.1001     0.9090    1.0441    1.1591
## male                     0.9482     1.0547    0.9057    0.9926
## age                      0.9826     1.0177    0.9410    1.0260
## popdens                  0.9283     1.0772    0.8789    0.9806
## manufact                 1.0509     0.9515    1.0015    1.1028
## tourism                  0.8890     1.1249    0.8460    0.9341
## academics                1.0206     0.9798    0.9372    1.1115
## medinc                   0.9256     1.0804    0.8646    0.9909
## healthcare               1.0401     0.9615    0.9845    1.0988
## temp                     0.7475     1.3378    0.7089    0.7882
## onset_prev               0.9654     1.0359    0.9028    1.0323
## slope_prev               0.8875     1.1267    0.8314    0.9474
## cpt_day_socdist_2_lag    0.7892     1.2671    0.7190    0.8662
## 
## Concordance= 0.661  (se = 0.007 )
## Likelihood ratio test= 344.5  on 14 df,   p=<2e-16
## Wald test            = 320.8  on 14 df,   p=<2e-16
## Score (logrank) test = 325.7  on 14 df,   p=<2e-16

Predict Socdist Mean

lm_mean_socdist <- spec_calculate(df = df_us_socdist_scaled, 
               y = "mean_diff_socdist_2", 
               model = "lm", 
               controls = covariates %>% 
                 append(c('mean_diff_socdist_2_lag', 'onset_prev', 'slope_prev')), 
               all.comb = T)

lm_mean_socdist <- lm_mean_socdist %>% map(filter_socdist)

calc_all_summaries(lm_mean_socdist)
## $pers_o
##        estimate    std.error statistic      p.value   conf.low  conf.high
## mean 0.14021047 0.0194929117  7.205979 8.705399e-07 0.10198542 0.17843551
## sd   0.03452777 0.0006951486  1.812431 7.476719e-06 0.03461717 0.03449206
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic   fit_p.value
## mean    0.35328573        0.35083507 0.80530040     146.96585 1.503377e-124
## sd      0.04184288        0.04168147 0.02561953      24.32935 4.585680e-123
##        fit_df fit_logLik  fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2838.6814 5699.363 5762.8213   1529.47925     2356.000000
## sd   1.732262    75.1761  148.104  141.8935     98.95842        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        1        0                    1
## sd          0           0        0        0                    0
##      significant_negative
## mean                    0
## sd                      0
## 
## $pers_c
##         estimate    std.error statistic      p.value    conf.low   conf.high
## mean -0.09138612 0.0175028708 -5.202841 0.0001219264 -0.12570875 -0.05706349
## sd    0.02382600 0.0005762093  1.266658 0.0005332005  0.02445777  0.02323204
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean     0.3459287        0.34345301 0.80980432     141.97404 1.794927e-99
## sd       0.0455359        0.04536277 0.02764305      22.82046 5.855583e-98
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2851.70314 5725.4063 5788.8648    1546.8787     2356.000000
## sd   1.732262    80.44351  158.4916  151.8134     107.6924        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366           1        0        1                    0
## sd          0           0        0        0                    0
##      significant_negative
## mean                    1
## sd                      0
## 
## $pers_e
##          estimate    std.error  statistic   p.value    conf.low  conf.high
## mean -0.005150281 0.0170325770 -0.3254585 0.3943169 -0.03855068 0.02825012
## sd    0.018590856 0.0005492686  1.0726356 0.2595924  0.01788979 0.01932655
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.33818294        0.33567872 0.81453789     137.17003 1.467243e-86
## sd      0.04833435        0.04817127 0.02914621      23.33049 5.538707e-85
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2865.36119 5752.7224 5816.1809    1565.1974     2356.000000
## sd   1.732262    84.18746  166.0151  159.4343     114.3107        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366  0.04589844 0.2880859 0.7119141           0.03637695
## sd          0  0.20929038 0.4529266 0.4529266           0.18724911
##      significant_negative
## mean          0.009521484
## sd            0.097124295
## 
## $pers_a
##         estimate    std.error statistic    p.value    conf.low   conf.high
## mean -0.07066438 0.0179947707 -3.905691 0.02573413 -0.10595161 -0.03537715
## sd    0.03237484 0.0005958765  1.737321 0.07033431  0.03290196  0.03188185
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean    0.34307315        0.34058648 0.81157397     140.18752 5.652261e-98
## sd      0.04555493        0.04538166 0.02759323      22.68894 2.193977e-96
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2856.87873 5735.7575 5799.2160    1553.6320     2356.000000
## sd   1.732262    80.11993  157.8544  151.2078     107.7374        1.732262
##      fit_nobs significant positive negative significant_positive
## mean     2366   0.8581543        0        1                    0
## sd          0   0.3489344        0        0                    0
##      significant_negative
## mean            0.8581543
## sd              0.3489344
## 
## $pers_n
##        estimate    std.error statistic   p.value   conf.low  conf.high
## mean 0.05788521 0.0182294883  3.197587 0.0937796 0.02213770 0.09363271
## sd   0.03644144 0.0005360911  2.007429 0.2089676 0.03681758 0.03609200
##      fit_r.squared fit_adj.r.squared  fit_sigma fit_statistic  fit_p.value
## mean     0.3416623        0.33917212 0.81234918     139.36614 2.498468e-86
## sd       0.0500918        0.04993868 0.03028064      24.26272 9.696779e-85
##        fit_df  fit_logLik   fit_AIC   fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2858.86918 5739.7384 5803.1969    1556.9687     2356.000000
## sd   1.732262    87.63406  172.9206  166.3678     118.4671        1.732262
##      fit_nobs significant  positive  negative significant_positive
## mean     2366   0.7509766 0.8999023 0.1000977            0.7460938
## sd          0   0.4325002 0.3001668 0.3001668            0.4352977
##      significant_negative
## mean          0.004882812
## sd            0.069714828
plot_all_curves(lm_mean_socdist, 'lm_mean_socdist')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

lm_mean_socdist_ctrl <- lm(mean_diff_socdist_2 ~ airport_dist + 
                               conservative + male + age + popdens + manufact +
                               tourism + academics + medinc + healthcare + 
                               temp + onset_prev + slope_prev +
                                 mean_diff_socdist_2_lag,
                             data = df_us_socdist_scaled)

summary(lm_mean_socdist_ctrl)
## 
## Call:
## lm(formula = mean_diff_socdist_2 ~ airport_dist + conservative + 
##     male + age + popdens + manufact + tourism + academics + medinc + 
##     healthcare + temp + onset_prev + slope_prev + mean_diff_socdist_2_lag, 
##     data = df_us_socdist_scaled)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6309 -0.2614  0.0611  0.3676  2.6202 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.0023280  0.0157422   0.148  0.88245    
## airport_dist            -0.0964326  0.0186435  -5.172 2.51e-07 ***
## conservative            -0.1728838  0.0201691  -8.572  < 2e-16 ***
## male                    -0.0058333  0.0175253  -0.333  0.73928    
## age                      0.0833188  0.0171674   4.853 1.29e-06 ***
## popdens                  0.1215938  0.0184869   6.577 5.88e-11 ***
## manufact                -0.0236647  0.0195175  -1.212  0.22545    
## tourism                  0.0744721  0.0184003   4.047 5.35e-05 ***
## academics                0.0745873  0.0341493   2.184  0.02905 *  
## medinc                   0.2866240  0.0272018  10.537  < 2e-16 ***
## healthcare               0.0008116  0.0210337   0.039  0.96922    
## temp                     0.0572502  0.0208304   2.748  0.00603 ** 
## onset_prev              -0.0587622  0.0252012  -2.332  0.01980 *  
## slope_prev               0.0714963  0.0237801   3.007  0.00267 ** 
## mean_diff_socdist_2_lag  0.3108043  0.0273882  11.348  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7657 on 2351 degrees of freedom
## Multiple R-squared:  0.4172, Adjusted R-squared:  0.4138 
## F-statistic: 120.2 on 14 and 2351 DF,  p-value: < 2.2e-16

Save all results

results_us <- list(cox_onset_prev_hr, lm_slope_prev, lm_slope_prev_var, 
                   lm_cases_0415, lm_cases_0930, lm_deaths_0415, lm_deaths_0930, 
                   cox_onset_socdist_hr, lm_mean_socdist)

names(results_us) <- list('cox_onset_prev_hr', 'lm_slope_prev', 'lm_slope_prev_var', 
                   'lm_cases_0415', 'lm_cases_0930', 'lm_deaths_0415', 'lm_deaths_0930', 
                   'cox_onset_socdist_hr', 'lm_mean_socdist')

save(results_us, file="US_spec_results.RData")

Descriptives

Distributions

death_temp <- df_us_death_unscaled %>% select(county_fips:death_rate_0930)
onset_temp <- df_us_prev_unscaled %>% select(county_fips, onset_prev, slope_prev)

df_us_desc <- df_us_socdist_unscaled %>% 
  plyr::join(death_temp, by="county_fips") %>%
  dplyr::select(pers_o, pers_c, pers_e, pers_a, pers_n, 
                age, male, conservative, 
                academics, medinc, manufact, 
                airport_dist, tourism, healthcare, popdens,
                temp, onset_prev, slope_prev, 
                case_rate_0415, case_rate_0930, death_rate_0415, death_rate_0930,
                cpt_day_socdist_2, mean_diff_socdist_2)
          

df_us_desc %>% select(age) %>% summary()
##       age       
##  Min.   :22.90  
##  1st Qu.:37.70  
##  Median :40.70  
##  Mean   :40.59  
##  3rd Qu.:43.40  
##  Max.   :67.00
us_means <- df_us_desc %>% summarise_all(mean)
us_sd <- df_us_desc %>% summarise_all(sd)

desc_us <- rbind(us_means, us_sd) %>% t() %>% round(3) %>% as.data.frame() 
names(desc_us) <- c('mean', 'sd')
desc_us %>% rownames_to_column() %>% write_csv('us_descriptives.csv')
desc_us

Explore correlations

a <- df_us_socdist_unscaled %>% select(county_fips, pers_o:pers_n, cpt_day_socdist, mean_diff_socdist)
b <- df_us_prev_unscaled %>% select(county_fips, onset_prev, slope_prev)
c <- df_us_death_unscaled %>% select(county_fips, case_rate_0415:death_rate_0930)

us_joined <- plyr::join_all(list(a, b, c), by='county_fips')

us_joined %>% select(-county_fips) %>% cor(use = 'pairwise.complete')
##                        pers_o      pers_a      pers_e      pers_c      pers_n
## pers_o             1.00000000 -0.15281714 -0.08497946 -0.04802258 -0.22873070
## pers_a            -0.15281714  1.00000000  0.24480399  0.65337156 -0.39348271
## pers_e            -0.08497946  0.24480399  1.00000000  0.15985220 -0.39790623
## pers_c            -0.04802258  0.65337156  0.15985220  1.00000000 -0.40801964
## pers_n            -0.22873070 -0.39348271 -0.39790623 -0.40801964  1.00000000
## cpt_day_socdist    0.13992321  0.08453292  0.02403108  0.09085251 -0.12512306
## mean_diff_socdist  0.33391055 -0.23412700 -0.01723667 -0.21269607  0.06173468
## onset_prev        -0.29216003 -0.09682226 -0.07478377 -0.09611230  0.26005706
## slope_prev         0.19638054  0.12336567  0.06290779  0.09638924 -0.18203280
## case_rate_0415     0.14548085  0.07765531  0.05022219  0.04619685 -0.12545916
## death_rate_0415    0.08588663  0.09385457  0.02045067  0.05738798 -0.06854497
## case_rate_0930    -0.09448026  0.32267793  0.07255061  0.23696948 -0.20943857
## death_rate_0930   -0.01762812  0.31188291  0.02183955  0.22741935 -0.12688753
##                   cpt_day_socdist mean_diff_socdist  onset_prev  slope_prev
## pers_o                 0.13992321        0.33391055 -0.29216003  0.19638054
## pers_a                 0.08453292       -0.23412700 -0.09682226  0.12336567
## pers_e                 0.02403108       -0.01723667 -0.07478377  0.06290779
## pers_c                 0.09085251       -0.21269607 -0.09611230  0.09638924
## pers_n                -0.12512306        0.06173468  0.26005706 -0.18203280
## cpt_day_socdist        1.00000000        0.15266953 -0.14248381  0.19833550
## mean_diff_socdist      0.15266953        1.00000000 -0.26415088  0.27105720
## onset_prev            -0.14248381       -0.26415088  1.00000000 -0.72372322
## slope_prev             0.19833550        0.27105720 -0.72372322  1.00000000
## case_rate_0415         0.15305730        0.24381339 -0.47200894  0.86665276
## death_rate_0415        0.12568626        0.19568034 -0.39929052  0.75658883
## case_rate_0930         0.14915336       -0.40302394 -0.14180907  0.22824887
## death_rate_0930        0.20710211       -0.09734626 -0.27900660  0.51420879
##                   case_rate_0415 death_rate_0415 case_rate_0930 death_rate_0930
## pers_o                0.14548085      0.08588663    -0.09448026     -0.01762812
## pers_a                0.07765531      0.09385457     0.32267793      0.31188291
## pers_e                0.05022219      0.02045067     0.07255061      0.02183955
## pers_c                0.04619685      0.05738798     0.23696948      0.22741935
## pers_n               -0.12545916     -0.06854497    -0.20943857     -0.12688753
## cpt_day_socdist       0.15305730      0.12568626     0.14915336      0.20710211
## mean_diff_socdist     0.24381339      0.19568034    -0.40302394     -0.09734626
## onset_prev           -0.47200894     -0.39929052    -0.14180907     -0.27900660
## slope_prev            0.86665276      0.75658883     0.22824887      0.51420879
## case_rate_0415        1.00000000      0.80803632     0.19781085      0.46226490
## death_rate_0415       0.80803632      1.00000000     0.14786594      0.48273071
## case_rate_0930        0.19781085      0.14786594     1.00000000      0.59222976
## death_rate_0930       0.46226490      0.48273071     0.59222976      1.00000000
us_means <- us_joined %>% select(-county_fips) %>% summarise_all(mean)
us_sd <- us_joined %>% select(-county_fips) %>% summarise_all(sd)

desc_us <- rbind(us_means, us_sd) %>% t() %>% round(3) %>% as.data.frame() %>%
  rename(mean=V1, sd=V2)

desc_us %>% rownames_to_column() %>% write_csv('us_descriptives.csv')
desc_us  

Visualization

Example Plots

# get county names
counties <- read_csv('US_personality_2.csv') %>%
  select(county, county_name) %>%
  dplyr::rename(county_fips = county) %>%
  mutate(county_fips = as.character(county_fips)) %>%
  distinct()

df_us_prev <- read_csv('US_prevalence.csv')

df_us_prev <- df_us_prev %>% 
  select(fips, date, rate) %>% 
  mutate(date = as.Date(date, "%d%b%Y")) %>% 
  dplyr::rename(county_fips = fips, 
         rate_day = rate) %>%
  mutate(county_fips = as.character(county_fips)) %>% 
  filter(rate_day >= 0 & rate_day <= 1000)

# merge with processed data frames
df_us_prev <- df_us_prev %>% inner_join(counties, by = 'county_fips')
df_us_socdist <- df_us_socdist %>% inner_join(counties, by = 'county_fips')
gg_prev <- df_us_prev %>% 
  filter(county_fips == '8037' | county_fips == '19079') %>%
  filter(date >= '2020-03-1' & date <= '2020-05-01') %>%
  mutate(window = ifelse(date >= '2020-03-15' & date <= '2020-04-15', 
                         'in window', 'out of window')) %>%
  ggplot(aes(x=date, y=rate_day)) + 
  geom_point(aes(col=county_name)) + 
  geom_line(aes(col=county_name)) + 
  theme_light() +
  theme(legend.position="bottom", legend.margin=margin(t=-20)) +
  guides(color=guide_legend(title="")) +
  annotate('rect', xmin = as.Date('2020-03-15'), 
           xmax = as.Date('2020-04-15'), 
           ymin = -Inf, ymax = Inf, alpha = 0.07) +
  geom_segment(aes(x = as.Date('2020-04-07'), y = 0, 
                   xend = as.Date('2020-04-07'), yend = 1), 
               colour = "#00BFC4") +
  geom_segment(aes(x = as.Date('2020-03-09'), y = 0, 
                   xend = as.Date('2020-03-09'), yend = 1), 
               colour = "#F8766D") +
  annotate("text", x = as.Date('2020-04-07'), 
           y = 2, label = "Onset \n Hamilton County", color = "#00BFC4", size = 3) +
  annotate("text", x = as.Date('2020-03-09'), 
           y = 2, label = "Onset \n Eagle County", color = "#F8766D", size = 3) +
  annotate("text", x = as.Date('2020-03-31'), 
           y = 9.5, label = "Time window used for\nanalysis of growth rates", 
           color = "grey50", size = 3) +
  ylab('Prevalence') +
  xlab('') +
  ylim(c(-1,10)) +
  ggtitle("COVID-19 - comparison of onsets and \n growth rates in two counties") +
  theme(plot.title = element_text(size=11)) +
  theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())

gg_prev

gg_socdist <- df_us_socdist %>% 
  filter(county_fips == 19137 | county_fips == 36081) %>%
  filter(date >= '2020-03-1' & date <= '2020-05-01') %>%
  ggplot(aes(x=date, y=socdist_tiles)) + 
  #facet_wrap(~countyname) +
  geom_point(aes(col=county_name)) + 
  geom_line(aes(col=county_name)) + 
  theme_light() +
  theme(legend.position="bottom",legend.margin=margin(t=-20)) +
  guides(color=guide_legend(title="")) +
    geom_segment(aes(x = as.Date('2020-03-23'), y = -.6, 
                   xend = as.Date('2020-03-23'), yend = 0), 
               colour = "#00BFC4") +
  geom_segment(aes(x = as.Date('2020-03-23'), y = -.6, 
                   xend = as.Date('2020-04-28'), yend = -.6), 
               colour = "#00BFC4") +
  geom_segment(aes(x = as.Date('2020-03-14'), y = -.156, 
                   xend = as.Date('2020-03-14'), yend = .17), 
               colour = "#F8766D") +
  geom_segment(aes(x = as.Date('2020-03-14'), y = -.156, 
                   xend = as.Date('2020-04-28'), yend = -.156), 
               colour = "#F8766D") +
  annotate("text", x = as.Date('2020-03-26'), 
           y = .08, label = "Change point\nQueens County", color = "#00BFC4", size = 3) +
  annotate("text", x = as.Date('2020-03-16'), 
           y = .25, label = "Change point\nMontgommery County", color = "#F8766D", size = 3) +
  ylab('Social distancing') +
  xlab("") +
  ylim(c(-.7,.3)) +
  ggtitle("Social distancing - comparison of change \n points and means in two counties") + 
  theme(plot.title = element_text(size=11)) +
  theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())

gg_socdist

figure <- ggarrange(gg_prev, gg_socdist,
                    labels = c("A", "B"),
                    ncol = 2, nrow = 1)
figure

# create sequence of dates
date_sequence <- seq.Date(min(df_us_prev$date),
                          max(df_us_prev$date), 1)
                     
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence)) 
names(df_dates) <- c('date', 'time')

tto <- df_us_prev_unscaled %>%
  merge(df_dates, by.x = 'onset_prev', by.y = 'time') %>%
  filter(event==1) %>%
  ggplot(aes(x=date+1)) +
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - COVID-19 Onset') + 
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Date of March')
  
soa <- df_us_slope_prev %>% 
  ggplot(aes(x=slope_prev)) +
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - COVID-19 Growth Rates') +
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Standardized growth rates')


figure <- ggarrange(tto, soa,
                    labels = c("", ""),
                    ncol = 2, nrow = 1)
figure

tto <- df_us_death_unscaled %>% 
  ggplot(aes(x=case_rate_0930)) + 
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - Cumulative Case Rates') + 
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Cases Per 1000 Inhabitants')
  
soa <- df_us_death_unscaled %>%
  ggplot(aes(x=death_rate_0930)) +
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - Cumulative Death Rates') +
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Deaths Per 1000 Inhabitants')


figure <- ggarrange(tto, soa,
                    labels = c("", ""),
                    ncol = 2, nrow = 1)
figure

# create sequence of dates
date_sequence <- seq.Date(min(df_us_socdist$date),
                          max(df_us_socdist$date), 1)
                     
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence)) 
names(df_dates) <- c('date', 'time')

tto <- df_us_cpt_socdist %>% 
  merge(df_dates, by.x = 'cpt_day_socdist_2', by.y = 'time') %>%
  filter(cpt_day_socdist_2 < 30) %>% 
  ggplot(aes(x=date+1)) + 
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - Time to Adoption') + 
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Date') +
  xlim(c(as.Date('2020-03-01'), as.Date('2020-03-31')))

soa <- df_us_cpt_socdist %>% 
  ggplot(aes(x=mean_diff_socdist_2)) + 
  geom_histogram(bins=15) +
  theme_light() +
  ggtitle('Distribution - Strength of Adjustment') + 
  theme(plot.title = element_text(size=11)) +
  ylab('Absolute frequency') +
  xlab('Standardized mean difference') 


figure <- ggarrange(tto, soa,
                    labels = c("", ""),
                    ncol = 2, nrow = 1)
figure

Robustness check FB mobility data

df_us_google <- read_csv("../Google/2020_US_Region_Mobility_Report.csv")

df_us_fb <- df_us_socdist %>%
  select(county_fips, date, socdist_tiles, socdist_single_tile)

df_us_ggl <- df_us_google %>% 
  select(census_fips_code, date,
         retail_and_recreation_percent_change_from_baseline:residential_percent_change_from_baseline) %>%
  drop_na() %>%
  mutate(county_fips = as.character(as.numeric(census_fips_code))) %>%
  select(-census_fips_code)

df_us_fb_ggl <- df_us_fb %>% plyr::join(df_us_ggl, c('county_fips', 'date')) %>% drop_na()

df_us_fb_ggl %>% split(.$county_fips) %>%
  map(~ map_if(., is.numeric, scale)) %>%
  bind_rows() %>%
  select(socdist_tiles:residential_percent_change_from_baseline) %>%
  drop_na() %>%
  cor() %>% 
  as.data.frame() %>%
  select(c(1,2)) %>% round(3) %>% write.csv()
## "","socdist_tiles","socdist_single_tile"
## "socdist_tiles",1,-0.971
## "socdist_single_tile",-0.971,1
## "retail_and_recreation_percent_change_from_baseline",0.945,-0.939
## "grocery_and_pharmacy_percent_change_from_baseline",0.766,-0.795
## "parks_percent_change_from_baseline",0.44,-0.442
## "transit_stations_percent_change_from_baseline",0.889,-0.888
## "workplaces_percent_change_from_baseline",0.919,-0.925
## "residential_percent_change_from_baseline",-0.906,0.897

Visualize distribution of effect sizes

load("US_spec_results.RData")
load("../GER/GER_spec_results.RData")
plot_dist <- function(df, filename){
  
  w <- 5
  h <- 5
  
  file_path_eps <- paste0("../../Plots/COMP/", filename,".eps")
  file_path_pdf <- paste0("../../Plots/COMP/", filename,".pdf")
  file_path_png <- paste0("../../Plots/COMP/", filename,".png")
  
    
      ggplot(df, aes(x=estimate, fill=country), alpha=.3) + 
        geom_histogram(alpha=.5, position="identity") +
      #coord_fixed(ratio = 0.1) +
        theme_bw() +
      theme(panel.border = element_blank(), panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(), 
            axis.line = element_line(colour = "black")) +
        theme(axis.title.x=element_blank(),
              axis.text.y=element_blank(),
              axis.title.y=element_blank()) +
        theme(legend.position="none") +
      ggsave(file=file_path_eps, device = 'eps', width=w, height=h)+
      ggsave(file=file_path_pdf, device = 'pdf', width=w, height=h)+
      ggsave(file=file_path_png, device = 'png', width=w, height=h)
    
    
}

# define function to plot all curves in list
plot_all_dist <- function(ls, analysis, hr=F){
  
  pers <- c('o', 'c', 'e', 'a', 'n')
  filenames <- as.list(paste0(analysis, '_', pers))
  pmap(list(ls, filenames), plot_dist)
}
join_results <- function(list_us, list_ger){
  
  US = "US"
  GER = "GER"

  list_us <- list_us %>% 
    map(~cbind(.x, US) %>% rename(country=US))
  list_ger <- list_ger %>% 
    map(~cbind(.x, GER) %>% rename(country=GER))
  
  map2(list_us, list_ger, rbind)
  
}
join_results(results_us$cox_onset_prev_hr, results_ger$cox_onset_prev_hr) %>% 
  plot_all_dist(analysis = 'cox_onset_prev_hr')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_slope_prev, results_ger$lm_slope_prev) %>% 
  plot_all_dist(analysis = 'lm_slope_prev')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_slope_prev_var, results_ger$lm_slope_prev_var) %>% 
  plot_all_dist(analysis = 'lm_slope_prev_var')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_cases_0415, results_ger$lm_cases_0331) %>% 
  plot_all_dist(analysis = 'lm_cases_0415')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_cases_0930, results_ger$lm_cases_0930) %>% 
  plot_all_dist(analysis = 'lm_cases_0930')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_deaths_0415, results_ger$lm_deaths_0331) %>% 
  plot_all_dist(analysis = 'lm_deaths_0415')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_deaths_0930, results_ger$lm_deaths_0930) %>% 
  plot_all_dist(analysis = 'lm_deaths_0930')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$cox_onset_socdist_hr, results_ger$cox_onset_socdist_hr) %>% 
  plot_all_dist(analysis = 'cox_onset_socdist_hr')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

join_results(results_us$lm_mean_socdist, results_ger$lm_mean_socdist) %>% 
  plot_all_dist(analysis = 'lm_mean_socdist')
## $spec_results_o

## 
## $spec_results_c

## 
## $spec_results_e

## 
## $spec_results_a

## 
## $spec_results_n

calculate_effsize <- function(df){
  df %>% summarise(d = effsize::cohen.d(estimate ~ country)$estimate, 
                   pval = t.test(estimate ~ country, var.equal = TRUE)$p.value)
}

all_ttest <- map2(results_us, results_ger, join_results) %>% flatten() %>%
  map(calculate_effsize) %>% bind_rows(.id = "model")
models <- rep(names(results_us), each=5)
all_ttest <- cbind(models, all_ttest)

all_ttest
save(all_ttest, file='ttest_results.RData')
>>>>>>> f9bb205265c8cd2a54b17b2b2634fb89e3d3c38f